MimeMagic.php 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839
  1. <?php
  2. /** Module defining helper functions for detecting and dealing with mime types.
  3. *
  4. */
  5. /** Defines a set of well known mime types
  6. * This is used as a fallback to mime.types files.
  7. * An extensive list of well known mime types is provided by
  8. * the file mime.types in the includes directory.
  9. */
  10. define('MM_WELL_KNOWN_MIME_TYPES',<<<END_STRING
  11. application/ogg ogg ogm ogv
  12. application/pdf pdf
  13. application/vnd.oasis.opendocument.chart odc
  14. application/vnd.oasis.opendocument.chart-template otc
  15. application/vnd.oasis.opendocument.formula odf
  16. application/vnd.oasis.opendocument.formula-template otf
  17. application/vnd.oasis.opendocument.graphics odg
  18. application/vnd.oasis.opendocument.graphics-template otg
  19. application/vnd.oasis.opendocument.image odi
  20. application/vnd.oasis.opendocument.image-template oti
  21. application/vnd.oasis.opendocument.presentation odp
  22. application/vnd.oasis.opendocument.presentation-template otp
  23. application/vnd.oasis.opendocument.spreadsheet ods
  24. application/vnd.oasis.opendocument.spreadsheet-template ots
  25. application/vnd.oasis.opendocument.text odt
  26. application/vnd.oasis.opendocument.text-template ott
  27. application/vnd.oasis.opendocument.text-master otm
  28. application/vnd.oasis.opendocument.text-web oth
  29. application/x-javascript js
  30. application/x-shockwave-flash swf
  31. audio/midi mid midi kar
  32. audio/mpeg mpga mpa mp2 mp3
  33. audio/x-aiff aif aiff aifc
  34. audio/x-wav wav
  35. audio/ogg ogg
  36. image/x-bmp bmp
  37. image/gif gif
  38. image/jpeg jpeg jpg jpe
  39. image/png png
  40. image/svg+xml image/svg svg
  41. image/tiff tiff tif
  42. image/vnd.djvu image/x.djvu image/x-djvu djvu
  43. image/x-portable-pixmap ppm
  44. image/x-xcf xcf
  45. text/plain txt
  46. text/html html htm
  47. video/ogg ogm ogg ogv
  48. video/mpeg mpg mpeg
  49. END_STRING
  50. );
  51. /** Defines a set of well known mime info entries
  52. * This is used as a fallback to mime.info files.
  53. * An extensive list of well known mime types is provided by
  54. * the file mime.info in the includes directory.
  55. */
  56. define('MM_WELL_KNOWN_MIME_INFO', <<<END_STRING
  57. application/pdf [OFFICE]
  58. application/vnd.oasis.opendocument.chart [OFFICE]
  59. application/vnd.oasis.opendocument.chart-template [OFFICE]
  60. application/vnd.oasis.opendocument.formula [OFFICE]
  61. application/vnd.oasis.opendocument.formula-template [OFFICE]
  62. application/vnd.oasis.opendocument.graphics [OFFICE]
  63. application/vnd.oasis.opendocument.graphics-template [OFFICE]
  64. application/vnd.oasis.opendocument.image [OFFICE]
  65. application/vnd.oasis.opendocument.image-template [OFFICE]
  66. application/vnd.oasis.opendocument.presentation [OFFICE]
  67. application/vnd.oasis.opendocument.presentation-template [OFFICE]
  68. application/vnd.oasis.opendocument.spreadsheet [OFFICE]
  69. application/vnd.oasis.opendocument.spreadsheet-template [OFFICE]
  70. application/vnd.oasis.opendocument.text [OFFICE]
  71. application/vnd.oasis.opendocument.text-template [OFFICE]
  72. application/vnd.oasis.opendocument.text-master [OFFICE]
  73. application/vnd.oasis.opendocument.text-web [OFFICE]
  74. text/javascript application/x-javascript [EXECUTABLE]
  75. application/x-shockwave-flash [MULTIMEDIA]
  76. audio/midi [AUDIO]
  77. audio/x-aiff [AUDIO]
  78. audio/x-wav [AUDIO]
  79. audio/mp3 audio/mpeg [AUDIO]
  80. application/ogg audio/ogg video/ogg [MULTIMEDIA]
  81. image/x-bmp image/bmp [BITMAP]
  82. image/gif [BITMAP]
  83. image/jpeg [BITMAP]
  84. image/png [BITMAP]
  85. image/svg+xml [DRAWING]
  86. image/tiff [BITMAP]
  87. image/vnd.djvu [BITMAP]
  88. image/x-xcf [BITMAP]
  89. image/x-portable-pixmap [BITMAP]
  90. text/plain [TEXT]
  91. text/html [TEXT]
  92. video/ogg [VIDEO]
  93. video/mpeg [VIDEO]
  94. unknown/unknown application/octet-stream application/x-empty [UNKNOWN]
  95. END_STRING
  96. );
  97. #note: because this file is possibly included by a function,
  98. #we need to access the global scope explicitely!
  99. global $wgLoadFileinfoExtension;
  100. if ($wgLoadFileinfoExtension) {
  101. if(!extension_loaded('fileinfo')) dl('fileinfo.' . PHP_SHLIB_SUFFIX);
  102. }
  103. /**
  104. * Implements functions related to mime types such as detection and mapping to
  105. * file extension.
  106. *
  107. * Instances of this class are stateles, there only needs to be one global instance
  108. * of MimeMagic. Please use MimeMagic::singleton() to get that instance.
  109. */
  110. class MimeMagic {
  111. /**
  112. * Mapping of media types to arrays of mime types.
  113. * This is used by findMediaType and getMediaType, respectively
  114. */
  115. var $mMediaTypes= NULL;
  116. /** Map of mime type aliases
  117. */
  118. var $mMimeTypeAliases= NULL;
  119. /** map of mime types to file extensions (as a space seprarated list)
  120. */
  121. var $mMimeToExt= NULL;
  122. /** map of file extensions types to mime types (as a space seprarated list)
  123. */
  124. var $mExtToMime= NULL;
  125. /** IEContentAnalyzer instance
  126. */
  127. var $mIEAnalyzer;
  128. /** The singleton instance
  129. */
  130. private static $instance;
  131. /** Initializes the MimeMagic object. This is called by MimeMagic::singleton().
  132. *
  133. * This constructor parses the mime.types and mime.info files and build internal mappings.
  134. */
  135. function __construct() {
  136. /*
  137. * --- load mime.types ---
  138. */
  139. global $wgMimeTypeFile, $IP;
  140. $types = MM_WELL_KNOWN_MIME_TYPES;
  141. if ( $wgMimeTypeFile == 'includes/mime.types' ) {
  142. $wgMimeTypeFile = "$IP/$wgMimeTypeFile";
  143. }
  144. if ( $wgMimeTypeFile ) {
  145. if ( is_file( $wgMimeTypeFile ) and is_readable( $wgMimeTypeFile ) ) {
  146. wfDebug( __METHOD__.": loading mime types from $wgMimeTypeFile\n" );
  147. $types .= "\n";
  148. $types .= file_get_contents( $wgMimeTypeFile );
  149. } else {
  150. wfDebug( __METHOD__.": can't load mime types from $wgMimeTypeFile\n" );
  151. }
  152. } else {
  153. wfDebug( __METHOD__.": no mime types file defined, using build-ins only.\n" );
  154. }
  155. $types = str_replace( array( "\r\n", "\n\r", "\n\n", "\r\r", "\r" ), "\n", $types );
  156. $types = str_replace( "\t", " ", $types );
  157. $this->mMimeToExt = array();
  158. $this->mToMime = array();
  159. $lines = explode( "\n",$types );
  160. foreach ( $lines as $s ) {
  161. $s = trim( $s );
  162. if ( empty( $s ) ) continue;
  163. if ( strpos( $s, '#' ) === 0 ) continue;
  164. $s = strtolower( $s );
  165. $i = strpos( $s, ' ' );
  166. if ( $i === false ) continue;
  167. #print "processing MIME line $s<br>";
  168. $mime = substr( $s, 0, $i );
  169. $ext = trim( substr($s, $i+1 ) );
  170. if ( empty( $ext ) ) continue;
  171. if ( !empty( $this->mMimeToExt[$mime] ) ) {
  172. $this->mMimeToExt[$mime] .= ' ' . $ext;
  173. } else {
  174. $this->mMimeToExt[$mime] = $ext;
  175. }
  176. $extensions = explode( ' ', $ext );
  177. foreach ( $extensions as $e ) {
  178. $e = trim( $e );
  179. if ( empty( $e ) ) continue;
  180. if ( !empty( $this->mExtToMime[$e] ) ) {
  181. $this->mExtToMime[$e] .= ' ' . $mime;
  182. } else {
  183. $this->mExtToMime[$e] = $mime;
  184. }
  185. }
  186. }
  187. /*
  188. * --- load mime.info ---
  189. */
  190. global $wgMimeInfoFile;
  191. if ( $wgMimeInfoFile == 'includes/mime.info' ) {
  192. $wgMimeInfoFile = "$IP/$wgMimeInfoFile";
  193. }
  194. $info = MM_WELL_KNOWN_MIME_INFO;
  195. if ( $wgMimeInfoFile ) {
  196. if ( is_file( $wgMimeInfoFile ) and is_readable( $wgMimeInfoFile ) ) {
  197. wfDebug( __METHOD__.": loading mime info from $wgMimeInfoFile\n" );
  198. $info .= "\n";
  199. $info .= file_get_contents( $wgMimeInfoFile );
  200. } else {
  201. wfDebug(__METHOD__.": can't load mime info from $wgMimeInfoFile\n");
  202. }
  203. } else {
  204. wfDebug(__METHOD__.": no mime info file defined, using build-ins only.\n");
  205. }
  206. $info = str_replace( array( "\r\n", "\n\r", "\n\n", "\r\r", "\r" ), "\n", $info);
  207. $info = str_replace( "\t", " ", $info );
  208. $this->mMimeTypeAliases = array();
  209. $this->mMediaTypes = array();
  210. $lines = explode( "\n", $info );
  211. foreach ( $lines as $s ) {
  212. $s = trim( $s );
  213. if ( empty( $s ) ) continue;
  214. if ( strpos( $s, '#' ) === 0 ) continue;
  215. $s = strtolower( $s );
  216. $i = strpos( $s, ' ' );
  217. if ( $i === false ) continue;
  218. #print "processing MIME INFO line $s<br>";
  219. $match = array();
  220. if ( preg_match( '!\[\s*(\w+)\s*\]!', $s, $match ) ) {
  221. $s = preg_replace( '!\[\s*(\w+)\s*\]!', '', $s );
  222. $mtype = trim( strtoupper( $match[1] ) );
  223. } else {
  224. $mtype = MEDIATYPE_UNKNOWN;
  225. }
  226. $m = explode( ' ', $s );
  227. if ( !isset( $this->mMediaTypes[$mtype] ) ) {
  228. $this->mMediaTypes[$mtype] = array();
  229. }
  230. foreach ( $m as $mime ) {
  231. $mime = trim( $mime );
  232. if ( empty( $mime ) ) continue;
  233. $this->mMediaTypes[$mtype][] = $mime;
  234. }
  235. if ( sizeof( $m ) > 1 ) {
  236. $main = $m[0];
  237. for ( $i=1; $i<sizeof($m); $i += 1 ) {
  238. $mime = $m[$i];
  239. $this->mMimeTypeAliases[$mime] = $main;
  240. }
  241. }
  242. }
  243. }
  244. /**
  245. * Get an instance of this class
  246. */
  247. static function &singleton() {
  248. if ( !isset( self::$instance ) ) {
  249. self::$instance = new MimeMagic;
  250. }
  251. return self::$instance;
  252. }
  253. /** returns a list of file extensions for a given mime type
  254. * as a space separated string.
  255. */
  256. function getExtensionsForType( $mime ) {
  257. $mime = strtolower( $mime );
  258. $r = @$this->mMimeToExt[$mime];
  259. if ( @!$r and isset( $this->mMimeTypeAliases[$mime] ) ) {
  260. $mime = $this->mMimeTypeAliases[$mime];
  261. $r = @$this->mMimeToExt[$mime];
  262. }
  263. return $r;
  264. }
  265. /** returns a list of mime types for a given file extension
  266. * as a space separated string.
  267. */
  268. function getTypesForExtension( $ext ) {
  269. $ext = strtolower( $ext );
  270. $r = isset( $this->mExtToMime[$ext] ) ? $this->mExtToMime[$ext] : null;
  271. return $r;
  272. }
  273. /** returns a single mime type for a given file extension.
  274. * This is always the first type from the list returned by getTypesForExtension($ext).
  275. */
  276. function guessTypesForExtension( $ext ) {
  277. $m = $this->getTypesForExtension( $ext );
  278. if ( is_null( $m ) ) return NULL;
  279. $m = trim( $m );
  280. $m = preg_replace( '/\s.*$/', '', $m );
  281. return $m;
  282. }
  283. /** tests if the extension matches the given mime type.
  284. * returns true if a match was found, NULL if the mime type is unknown,
  285. * and false if the mime type is known but no matches where found.
  286. */
  287. function isMatchingExtension( $extension, $mime ) {
  288. $ext = $this->getExtensionsForType( $mime );
  289. if ( !$ext ) {
  290. return NULL; //unknown
  291. }
  292. $ext = explode( ' ', $ext );
  293. $extension = strtolower( $extension );
  294. if ( in_array( $extension, $ext ) ) {
  295. return true;
  296. }
  297. return false;
  298. }
  299. /** returns true if the mime type is known to represent
  300. * an image format supported by the PHP GD library.
  301. */
  302. function isPHPImageType( $mime ) {
  303. #as defined by imagegetsize and image_type_to_mime
  304. static $types = array(
  305. 'image/gif', 'image/jpeg', 'image/png',
  306. 'image/x-bmp', 'image/xbm', 'image/tiff',
  307. 'image/jp2', 'image/jpeg2000', 'image/iff',
  308. 'image/xbm', 'image/x-xbitmap',
  309. 'image/vnd.wap.wbmp', 'image/vnd.xiff',
  310. 'image/x-photoshop',
  311. 'application/x-shockwave-flash',
  312. );
  313. return in_array( $mime, $types );
  314. }
  315. /**
  316. * Returns true if the extension represents a type which can
  317. * be reliably detected from its content. Use this to determine
  318. * whether strict content checks should be applied to reject
  319. * invalid uploads; if we can't identify the type we won't
  320. * be able to say if it's invalid.
  321. *
  322. * @todo Be more accurate when using fancy mime detector plugins;
  323. * right now this is the bare minimum getimagesize() list.
  324. * @return bool
  325. */
  326. function isRecognizableExtension( $extension ) {
  327. static $types = array(
  328. // Types recognized by getimagesize()
  329. 'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd',
  330. 'bmp', 'tiff', 'tif', 'jpc', 'jp2',
  331. 'jpx', 'jb2', 'swc', 'iff', 'wbmp',
  332. 'xbm',
  333. // Formats we recognize magic numbers for
  334. 'djvu', 'ogg', 'ogv', 'mid', 'pdf', 'wmf', 'xcf',
  335. // XML formats we sure hope we recognize reliably
  336. 'svg',
  337. );
  338. return in_array( strtolower( $extension ), $types );
  339. }
  340. /** mime type detection. This uses detectMimeType to detect the mime type of the file,
  341. * but applies additional checks to determine some well known file formats that may be missed
  342. * or misinterpreter by the default mime detection (namely xml based formats like XHTML or SVG).
  343. *
  344. * @param string $file The file to check
  345. * @param mixed $ext The file extension, or true to extract it from the filename.
  346. * Set it to false to ignore the extension.
  347. *
  348. * @return string the mime type of $file
  349. */
  350. function guessMimeType( $file, $ext = true ) {
  351. $mime = $this->doGuessMimeType( $file, $ext );
  352. if( !$mime ) {
  353. wfDebug( __METHOD__.": internal type detection failed for $file (.$ext)...\n" );
  354. $mime = $this->detectMimeType( $file, $ext );
  355. }
  356. if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
  357. $mime = $this->mMimeTypeAliases[$mime];
  358. }
  359. wfDebug(__METHOD__.": final mime type of $file: $mime\n");
  360. return $mime;
  361. }
  362. function doGuessMimeType( $file, $ext = true ) {
  363. // Read a chunk of the file
  364. wfSuppressWarnings();
  365. $f = fopen( $file, "rt" );
  366. wfRestoreWarnings();
  367. if( !$f ) return "unknown/unknown";
  368. $head = fread( $f, 1024 );
  369. fseek( $f, -65558, SEEK_END );
  370. $tail = fread( $f, 65558 ); // 65558 = maximum size of a zip EOCDR
  371. fclose( $f );
  372. // Hardcode a few magic number checks...
  373. $headers = array(
  374. // Multimedia...
  375. 'MThd' => 'audio/midi',
  376. 'OggS' => 'application/ogg',
  377. // Image formats...
  378. // Note that WMF may have a bare header, no magic number.
  379. "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives?
  380. "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile',
  381. '%PDF' => 'application/pdf',
  382. 'gimp xcf' => 'image/x-xcf',
  383. // Some forbidden fruit...
  384. 'MZ' => 'application/octet-stream', // DOS/Windows executable
  385. "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary
  386. "\x7fELF" => 'application/octet-stream', // ELF binary
  387. );
  388. foreach( $headers as $magic => $candidate ) {
  389. if( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
  390. wfDebug( __METHOD__ . ": magic header in $file recognized as $candidate\n" );
  391. return $candidate;
  392. }
  393. }
  394. /*
  395. * look for PHP
  396. * Check for this before HTML/XML...
  397. * Warning: this is a heuristic, and won't match a file with a lot of non-PHP before.
  398. * It will also match text files which could be PHP. :)
  399. */
  400. if( ( strpos( $head, '<?php' ) !== false ) ||
  401. ( strpos( $head, '<? ' ) !== false ) ||
  402. ( strpos( $head, "<?\n" ) !== false ) ||
  403. ( strpos( $head, "<?\t" ) !== false ) ||
  404. ( strpos( $head, "<?=" ) !== false ) ||
  405. ( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) ||
  406. ( strpos( $head, "<\x00?\x00 " ) !== false ) ||
  407. ( strpos( $head, "<\x00?\x00\n" ) !== false ) ||
  408. ( strpos( $head, "<\x00?\x00\t" ) !== false ) ||
  409. ( strpos( $head, "<\x00?\x00=" ) !== false ) ) {
  410. wfDebug( __METHOD__ . ": recognized $file as application/x-php\n" );
  411. return "application/x-php";
  412. }
  413. /*
  414. * look for XML formats (XHTML and SVG)
  415. */
  416. $xml = new XmlTypeCheck( $file );
  417. if( $xml->wellFormed ) {
  418. global $wgXMLMimeTypes;
  419. if( isset( $wgXMLMimeTypes[$xml->getRootElement()] ) ) {
  420. return $wgXMLMimeTypes[$xml->getRootElement()];
  421. } else {
  422. return 'application/xml';
  423. }
  424. }
  425. /*
  426. * look for shell scripts
  427. */
  428. $script_type = NULL;
  429. # detect by shebang
  430. if ( substr( $head, 0, 2) == "#!" ) {
  431. $script_type = "ASCII";
  432. } elseif ( substr( $head, 0, 5) == "\xef\xbb\xbf#!" ) {
  433. $script_type = "UTF-8";
  434. } elseif ( substr( $head, 0, 7) == "\xfe\xff\x00#\x00!" ) {
  435. $script_type = "UTF-16BE";
  436. } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) {
  437. $script_type= "UTF-16LE";
  438. }
  439. if ( $script_type ) {
  440. if ( $script_type !== "UTF-8" && $script_type !== "ASCII") {
  441. // Quick and dirty fold down to ASCII!
  442. $pack = array( 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' );
  443. $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
  444. $head = '';
  445. foreach( $chars as $codepoint ) {
  446. if( $codepoint < 128 ) {
  447. $head .= chr( $codepoint );
  448. } else {
  449. $head .= '?';
  450. }
  451. }
  452. }
  453. $match = array();
  454. if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) {
  455. $mime = "application/x-{$match[2]}";
  456. wfDebug( __METHOD__.": shell script recognized as $mime\n" );
  457. return $mime;
  458. }
  459. }
  460. // Check for ZIP (before getimagesize)
  461. if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
  462. wfDebug( __METHOD__.": ZIP header present at end of $file\n" );
  463. return $this->detectZipType( $head );
  464. }
  465. wfSuppressWarnings();
  466. $gis = getimagesize( $file );
  467. wfRestoreWarnings();
  468. if( $gis && isset( $gis['mime'] ) ) {
  469. $mime = $gis['mime'];
  470. wfDebug( __METHOD__.": getimagesize detected $file as $mime\n" );
  471. return $mime;
  472. }
  473. // Also test DjVu
  474. $deja = new DjVuImage( $file );
  475. if( $deja->isValid() ) {
  476. wfDebug( __METHOD__.": detected $file as image/vnd.djvu\n" );
  477. return 'image/vnd.djvu';
  478. }
  479. return false;
  480. }
  481. /**
  482. * Detect application-specific file type of a given ZIP file from its
  483. * header data. Currently works for OpenDocument types...
  484. * If can't tell, returns 'application/zip'.
  485. *
  486. * @param string $header Some reasonably-sized chunk of file header
  487. * @return string
  488. */
  489. function detectZipType( $header ) {
  490. $opendocTypes = array(
  491. 'chart-template',
  492. 'chart',
  493. 'formula-template',
  494. 'formula',
  495. 'graphics-template',
  496. 'graphics',
  497. 'image-template',
  498. 'image',
  499. 'presentation-template',
  500. 'presentation',
  501. 'spreadsheet-template',
  502. 'spreadsheet',
  503. 'text-template',
  504. 'text-master',
  505. 'text-web',
  506. 'text' );
  507. // http://lists.oasis-open.org/archives/office/200505/msg00006.html
  508. $types = '(?:' . implode( '|', $opendocTypes ) . ')';
  509. $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
  510. wfDebug( __METHOD__.": $opendocRegex\n" );
  511. if( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
  512. $mime = $matches[1];
  513. wfDebug( __METHOD__.": detected $mime from ZIP archive\n" );
  514. return $mime;
  515. } else {
  516. wfDebug( __METHOD__.": unable to identify type of ZIP archive\n" );
  517. return 'application/zip';
  518. }
  519. }
  520. /** Internal mime type detection, please use guessMimeType() for application code instead.
  521. * Detection is done using an external program, if $wgMimeDetectorCommand is set.
  522. * Otherwise, the fileinfo extension and mime_content_type are tried (in this order), if they are available.
  523. * If the dections fails and $ext is not false, the mime type is guessed from the file extension, using
  524. * guessTypesForExtension.
  525. * If the mime type is still unknown, getimagesize is used to detect the mime type if the file is an image.
  526. * If no mime type can be determined, this function returns "unknown/unknown".
  527. *
  528. * @param string $file The file to check
  529. * @param mixed $ext The file extension, or true to extract it from the filename.
  530. * Set it to false to ignore the extension.
  531. *
  532. * @return string the mime type of $file
  533. * @access private
  534. */
  535. function detectMimeType( $file, $ext = true ) {
  536. global $wgMimeDetectorCommand;
  537. $m = NULL;
  538. if ( $wgMimeDetectorCommand ) {
  539. $fn = wfEscapeShellArg( $file );
  540. $m = `$wgMimeDetectorCommand $fn`;
  541. } elseif ( function_exists( "finfo_open" ) && function_exists( "finfo_file" ) ) {
  542. # This required the fileinfo extension by PECL,
  543. # see http://pecl.php.net/package/fileinfo
  544. # This must be compiled into PHP
  545. #
  546. # finfo is the official replacement for the deprecated
  547. # mime_content_type function, see below.
  548. #
  549. # If you may need to load the fileinfo extension at runtime, set
  550. # $wgLoadFileinfoExtension in LocalSettings.php
  551. $mime_magic_resource = finfo_open(FILEINFO_MIME); /* return mime type ala mimetype extension */
  552. if ($mime_magic_resource) {
  553. $m = finfo_file( $mime_magic_resource, $file );
  554. finfo_close( $mime_magic_resource );
  555. } else {
  556. wfDebug( __METHOD__.": finfo_open failed on ".FILEINFO_MIME."!\n" );
  557. }
  558. } elseif ( function_exists( "mime_content_type" ) ) {
  559. # NOTE: this function is available since PHP 4.3.0, but only if
  560. # PHP was compiled with --with-mime-magic or, before 4.3.2, with --enable-mime-magic.
  561. #
  562. # On Windows, you must set mime_magic.magicfile in php.ini to point to the mime.magic file bundeled with PHP;
  563. # sometimes, this may even be needed under linus/unix.
  564. #
  565. # Also note that this has been DEPRECATED in favor of the fileinfo extension by PECL, see above.
  566. # see http://www.php.net/manual/en/ref.mime-magic.php for details.
  567. $m = mime_content_type($file);
  568. } else {
  569. wfDebug( __METHOD__.": no magic mime detector found!\n" );
  570. }
  571. if ( $m ) {
  572. # normalize
  573. $m = preg_replace( '![;, ].*$!', '', $m ); #strip charset, etc
  574. $m = trim( $m );
  575. $m = strtolower( $m );
  576. if ( strpos( $m, 'unknown' ) !== false ) {
  577. $m = NULL;
  578. } else {
  579. wfDebug( __METHOD__.": magic mime type of $file: $m\n" );
  580. return $m;
  581. }
  582. }
  583. # if desired, look at extension as a fallback.
  584. if ( $ext === true ) {
  585. $i = strrpos( $file, '.' );
  586. $ext = strtolower( $i ? substr( $file, $i + 1 ) : '' );
  587. }
  588. if ( $ext ) {
  589. if( $this->isRecognizableExtension( $ext ) ) {
  590. wfDebug( __METHOD__. ": refusing to guess mime type for .$ext file, we should have recognized it\n" );
  591. } else {
  592. $m = $this->guessTypesForExtension( $ext );
  593. if ( $m ) {
  594. wfDebug( __METHOD__.": extension mime type of $file: $m\n" );
  595. return $m;
  596. }
  597. }
  598. }
  599. #unknown type
  600. wfDebug( __METHOD__.": failed to guess mime type for $file!\n" );
  601. return "unknown/unknown";
  602. }
  603. /**
  604. * Determine the media type code for a file, using its mime type, name and possibly
  605. * its contents.
  606. *
  607. * This function relies on the findMediaType(), mapping extensions and mime
  608. * types to media types.
  609. *
  610. * @todo analyse file if need be
  611. * @todo look at multiple extension, separately and together.
  612. *
  613. * @param string $path full path to the image file, in case we have to look at the contents
  614. * (if null, only the mime type is used to determine the media type code).
  615. * @param string $mime mime type. If null it will be guessed using guessMimeType.
  616. *
  617. * @return (int?string?) a value to be used with the MEDIATYPE_xxx constants.
  618. */
  619. function getMediaType( $path = NULL, $mime = NULL ) {
  620. if( !$mime && !$path ) return MEDIATYPE_UNKNOWN;
  621. # If mime type is unknown, guess it
  622. if( !$mime ) $mime = $this->guessMimeType( $path, false );
  623. # Special code for ogg - detect if it's video (theora),
  624. # else label it as sound.
  625. if( $mime == "application/ogg" && file_exists( $path ) ) {
  626. // Read a chunk of the file
  627. $f = fopen( $path, "rt" );
  628. if ( !$f ) return MEDIATYPE_UNKNOWN;
  629. $head = fread( $f, 256 );
  630. fclose( $f );
  631. $head = strtolower( $head );
  632. # This is an UGLY HACK, file should be parsed correctly
  633. if ( strpos( $head, 'theora' ) !== false ) return MEDIATYPE_VIDEO;
  634. elseif ( strpos( $head, 'vorbis' ) !== false ) return MEDIATYPE_AUDIO;
  635. elseif ( strpos( $head, 'flac' ) !== false ) return MEDIATYPE_AUDIO;
  636. elseif ( strpos( $head, 'speex' ) !== false ) return MEDIATYPE_AUDIO;
  637. else return MEDIATYPE_MULTIMEDIA;
  638. }
  639. # check for entry for full mime type
  640. if( $mime ) {
  641. $type = $this->findMediaType( $mime );
  642. if( $type !== MEDIATYPE_UNKNOWN ) return $type;
  643. }
  644. # Check for entry for file extension
  645. $e = NULL;
  646. if ( $path ) {
  647. $i = strrpos( $path, '.' );
  648. $e = strtolower( $i ? substr( $path, $i + 1 ) : '' );
  649. # TODO: look at multi-extension if this fails, parse from full path
  650. $type = $this->findMediaType( '.' . $e );
  651. if ( $type !== MEDIATYPE_UNKNOWN ) return $type;
  652. }
  653. # Check major mime type
  654. if( $mime ) {
  655. $i = strpos( $mime, '/' );
  656. if( $i !== false ) {
  657. $major = substr( $mime, 0, $i );
  658. $type = $this->findMediaType( $major );
  659. if( $type !== MEDIATYPE_UNKNOWN ) return $type;
  660. }
  661. }
  662. if( !$type ) $type = MEDIATYPE_UNKNOWN;
  663. return $type;
  664. }
  665. /** returns a media code matching the given mime type or file extension.
  666. * File extensions are represented by a string starting with a dot (.) to
  667. * distinguish them from mime types.
  668. *
  669. * This funktion relies on the mapping defined by $this->mMediaTypes
  670. * @access private
  671. */
  672. function findMediaType( $extMime ) {
  673. if ( strpos( $extMime, '.' ) === 0 ) { #if it's an extension, look up the mime types
  674. $m = $this->getTypesForExtension( substr( $extMime, 1 ) );
  675. if ( !$m ) return MEDIATYPE_UNKNOWN;
  676. $m = explode( ' ', $m );
  677. } else {
  678. # Normalize mime type
  679. if ( isset( $this->mMimeTypeAliases[$extMime] ) ) {
  680. $extMime = $this->mMimeTypeAliases[$extMime];
  681. }
  682. $m = array($extMime);
  683. }
  684. foreach ( $m as $mime ) {
  685. foreach ( $this->mMediaTypes as $type => $codes ) {
  686. if ( in_array($mime, $codes, true ) ) {
  687. return $type;
  688. }
  689. }
  690. }
  691. return MEDIATYPE_UNKNOWN;
  692. }
  693. /**
  694. * Get the MIME types that various versions of Internet Explorer would
  695. * detect from a chunk of the content.
  696. *
  697. * @param string $fileName The file name (unused at present)
  698. * @param string $chunk The first 256 bytes of the file
  699. * @param string $proposed The MIME type proposed by the server
  700. */
  701. public function getIEMimeTypes( $fileName, $chunk, $proposed ) {
  702. $ca = $this->getIEContentAnalyzer();
  703. return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
  704. }
  705. /**
  706. * Get a cached instance of IEContentAnalyzer
  707. */
  708. protected function getIEContentAnalyzer() {
  709. if ( is_null( $this->mIEAnalyzer ) ) {
  710. $this->mIEAnalyzer = new IEContentAnalyzer;
  711. }
  712. return $this->mIEAnalyzer;
  713. }
  714. }