PNGMetadataExtractor.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. <?php
  2. /**
  3. * PNG frame counter and metadata extractor.
  4. *
  5. * Slightly derived from GIFMetadataExtractor.php
  6. * Deliberately not using MWExceptions to avoid external dependencies, encouraging
  7. * redistribution.
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU General Public License as published by
  11. * the Free Software Foundation; either version 2 of the License, or
  12. * (at your option) any later version.
  13. *
  14. * This program is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. * GNU General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU General Public License along
  20. * with this program; if not, write to the Free Software Foundation, Inc.,
  21. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  22. * http://www.gnu.org/copyleft/gpl.html
  23. *
  24. * @file
  25. * @ingroup Media
  26. */
  27. /**
  28. * PNG frame counter.
  29. *
  30. * @ingroup Media
  31. */
  32. class PNGMetadataExtractor {
  33. /** @var string */
  34. private static $pngSig;
  35. /** @var int */
  36. private static $crcSize;
  37. /** @var array */
  38. private static $textChunks;
  39. const VERSION = 1;
  40. const MAX_CHUNK_SIZE = 3145728; // 3 megabytes
  41. static function getMetadata( $filename ) {
  42. self::$pngSig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
  43. self::$crcSize = 4;
  44. /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
  45. * and https://www.w3.org/TR/PNG/#11keywords
  46. */
  47. self::$textChunks = [
  48. 'xml:com.adobe.xmp' => 'xmp',
  49. # Artist is unofficial. Author is the recommended
  50. # keyword in the PNG spec. However some people output
  51. # Artist so support both.
  52. 'artist' => 'Artist',
  53. 'model' => 'Model',
  54. 'make' => 'Make',
  55. 'author' => 'Artist',
  56. 'comment' => 'PNGFileComment',
  57. 'description' => 'ImageDescription',
  58. 'title' => 'ObjectName',
  59. 'copyright' => 'Copyright',
  60. # Source as in original device used to make image
  61. # not as in who gave you the image
  62. 'source' => 'Model',
  63. 'software' => 'Software',
  64. 'disclaimer' => 'Disclaimer',
  65. 'warning' => 'ContentWarning',
  66. 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
  67. 'label' => 'Label',
  68. 'creation time' => 'DateTimeDigitized',
  69. /* Other potentially useful things - Document */
  70. ];
  71. $frameCount = 0;
  72. $loopCount = 1;
  73. $text = [];
  74. $duration = 0.0;
  75. $bitDepth = 0;
  76. $colorType = 'unknown';
  77. if ( !$filename ) {
  78. throw new Exception( __METHOD__ . ": No file name specified" );
  79. } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
  80. throw new Exception( __METHOD__ . ": File $filename does not exist" );
  81. }
  82. $fh = fopen( $filename, 'rb' );
  83. if ( !$fh ) {
  84. throw new Exception( __METHOD__ . ": Unable to open file $filename" );
  85. }
  86. // Check for the PNG header
  87. $buf = fread( $fh, 8 );
  88. if ( $buf != self::$pngSig ) {
  89. throw new Exception( __METHOD__ . ": Not a valid PNG file; header: $buf" );
  90. }
  91. // Read chunks
  92. while ( !feof( $fh ) ) {
  93. $buf = fread( $fh, 4 );
  94. if ( !$buf || strlen( $buf ) < 4 ) {
  95. throw new Exception( __METHOD__ . ": Read error" );
  96. }
  97. $chunk_size = unpack( "N", $buf )[1];
  98. if ( $chunk_size < 0 ) {
  99. throw new Exception( __METHOD__ . ": Chunk size too big for unpack" );
  100. }
  101. $chunk_type = fread( $fh, 4 );
  102. if ( !$chunk_type || strlen( $chunk_type ) < 4 ) {
  103. throw new Exception( __METHOD__ . ": Read error" );
  104. }
  105. if ( $chunk_type == "IHDR" ) {
  106. $buf = self::read( $fh, $chunk_size );
  107. if ( !$buf || strlen( $buf ) < $chunk_size ) {
  108. throw new Exception( __METHOD__ . ": Read error" );
  109. }
  110. $width = unpack( 'N', substr( $buf, 0, 4 ) )[1];
  111. $height = unpack( 'N', substr( $buf, 4, 4 ) )[1];
  112. $bitDepth = ord( substr( $buf, 8, 1 ) );
  113. // Detect the color type in British English as per the spec
  114. // https://www.w3.org/TR/PNG/#11IHDR
  115. switch ( ord( substr( $buf, 9, 1 ) ) ) {
  116. case 0:
  117. $colorType = 'greyscale';
  118. break;
  119. case 2:
  120. $colorType = 'truecolour';
  121. break;
  122. case 3:
  123. $colorType = 'index-coloured';
  124. break;
  125. case 4:
  126. $colorType = 'greyscale-alpha';
  127. break;
  128. case 6:
  129. $colorType = 'truecolour-alpha';
  130. break;
  131. default:
  132. $colorType = 'unknown';
  133. break;
  134. }
  135. } elseif ( $chunk_type == "acTL" ) {
  136. $buf = fread( $fh, $chunk_size );
  137. if ( !$buf || strlen( $buf ) < $chunk_size || $chunk_size < 4 ) {
  138. throw new Exception( __METHOD__ . ": Read error" );
  139. }
  140. $actl = unpack( "Nframes/Nplays", $buf );
  141. $frameCount = $actl['frames'];
  142. $loopCount = $actl['plays'];
  143. } elseif ( $chunk_type == "fcTL" ) {
  144. $buf = self::read( $fh, $chunk_size );
  145. if ( !$buf || strlen( $buf ) < $chunk_size ) {
  146. throw new Exception( __METHOD__ . ": Read error" );
  147. }
  148. $buf = substr( $buf, 20 );
  149. if ( strlen( $buf ) < 4 ) {
  150. throw new Exception( __METHOD__ . ": Read error" );
  151. }
  152. $fctldur = unpack( "ndelay_num/ndelay_den", $buf );
  153. if ( $fctldur['delay_den'] == 0 ) {
  154. $fctldur['delay_den'] = 100;
  155. }
  156. if ( $fctldur['delay_num'] ) {
  157. $duration += $fctldur['delay_num'] / $fctldur['delay_den'];
  158. }
  159. } elseif ( $chunk_type == "iTXt" ) {
  160. // Extracts iTXt chunks, uncompressing if necessary.
  161. $buf = self::read( $fh, $chunk_size );
  162. $items = [];
  163. if ( preg_match(
  164. '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
  165. $buf, $items )
  166. ) {
  167. /* $items[1] = text chunk name, $items[2] = compressed flag,
  168. * $items[3] = lang code (or ""), $items[4]= compression type.
  169. * $items[5] = content
  170. */
  171. // Theoretically should be case-sensitive, but in practise...
  172. $items[1] = strtolower( $items[1] );
  173. if ( !isset( self::$textChunks[$items[1]] ) ) {
  174. // Only extract textual chunks on our list.
  175. fseek( $fh, self::$crcSize, SEEK_CUR );
  176. continue;
  177. }
  178. $items[3] = strtolower( $items[3] );
  179. if ( $items[3] == '' ) {
  180. // if no lang specified use x-default like in xmp.
  181. $items[3] = 'x-default';
  182. }
  183. // if compressed
  184. if ( $items[2] == "\x01" ) {
  185. if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
  186. Wikimedia\suppressWarnings();
  187. $items[5] = gzuncompress( $items[5] );
  188. Wikimedia\restoreWarnings();
  189. if ( $items[5] === false ) {
  190. // decompression failed
  191. wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] . "\n" );
  192. fseek( $fh, self::$crcSize, SEEK_CUR );
  193. continue;
  194. }
  195. } else {
  196. wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,'
  197. . " or potentially invalid compression method\n" );
  198. fseek( $fh, self::$crcSize, SEEK_CUR );
  199. continue;
  200. }
  201. }
  202. $finalKeyword = self::$textChunks[$items[1]];
  203. $text[$finalKeyword][$items[3]] = $items[5];
  204. $text[$finalKeyword]['_type'] = 'lang';
  205. } else {
  206. // Error reading iTXt chunk
  207. throw new Exception( __METHOD__ . ": Read error on iTXt chunk" );
  208. }
  209. } elseif ( $chunk_type == 'tEXt' ) {
  210. $buf = self::read( $fh, $chunk_size );
  211. // In case there is no \x00 which will make explode fail.
  212. if ( strpos( $buf, "\x00" ) === false ) {
  213. throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
  214. }
  215. list( $keyword, $content ) = explode( "\x00", $buf, 2 );
  216. if ( $keyword === '' || $content === '' ) {
  217. throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
  218. }
  219. // Theoretically should be case-sensitive, but in practise...
  220. $keyword = strtolower( $keyword );
  221. if ( !isset( self::$textChunks[$keyword] ) ) {
  222. // Don't recognize chunk, so skip.
  223. fseek( $fh, self::$crcSize, SEEK_CUR );
  224. continue;
  225. }
  226. Wikimedia\suppressWarnings();
  227. $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
  228. Wikimedia\restoreWarnings();
  229. if ( $content === false ) {
  230. throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
  231. }
  232. $finalKeyword = self::$textChunks[$keyword];
  233. $text[$finalKeyword]['x-default'] = $content;
  234. $text[$finalKeyword]['_type'] = 'lang';
  235. } elseif ( $chunk_type == 'zTXt' ) {
  236. if ( function_exists( 'gzuncompress' ) ) {
  237. $buf = self::read( $fh, $chunk_size );
  238. // In case there is no \x00 which will make explode fail.
  239. if ( strpos( $buf, "\x00" ) === false ) {
  240. throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
  241. }
  242. list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 );
  243. if ( $keyword === '' || $postKeyword === '' ) {
  244. throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
  245. }
  246. // Theoretically should be case-sensitive, but in practise...
  247. $keyword = strtolower( $keyword );
  248. if ( !isset( self::$textChunks[$keyword] ) ) {
  249. // Don't recognize chunk, so skip.
  250. fseek( $fh, self::$crcSize, SEEK_CUR );
  251. continue;
  252. }
  253. $compression = substr( $postKeyword, 0, 1 );
  254. $content = substr( $postKeyword, 1 );
  255. if ( $compression !== "\x00" ) {
  256. wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping.\n" );
  257. fseek( $fh, self::$crcSize, SEEK_CUR );
  258. continue;
  259. }
  260. Wikimedia\suppressWarnings();
  261. $content = gzuncompress( $content );
  262. Wikimedia\restoreWarnings();
  263. if ( $content === false ) {
  264. // decompression failed
  265. wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword . "\n" );
  266. fseek( $fh, self::$crcSize, SEEK_CUR );
  267. continue;
  268. }
  269. Wikimedia\suppressWarnings();
  270. $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
  271. Wikimedia\restoreWarnings();
  272. if ( $content === false ) {
  273. throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
  274. }
  275. $finalKeyword = self::$textChunks[$keyword];
  276. $text[$finalKeyword]['x-default'] = $content;
  277. $text[$finalKeyword]['_type'] = 'lang';
  278. } else {
  279. wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping.\n" );
  280. fseek( $fh, $chunk_size, SEEK_CUR );
  281. }
  282. } elseif ( $chunk_type == 'tIME' ) {
  283. // last mod timestamp.
  284. if ( $chunk_size !== 7 ) {
  285. throw new Exception( __METHOD__ . ": tIME wrong size" );
  286. }
  287. $buf = self::read( $fh, $chunk_size );
  288. if ( !$buf || strlen( $buf ) < $chunk_size ) {
  289. throw new Exception( __METHOD__ . ": Read error" );
  290. }
  291. // Note: spec says this should be UTC.
  292. $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
  293. $strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
  294. $t['y'], $t['m'], $t['d'], $t['h'],
  295. $t['min'], $t['s'] );
  296. $exifTime = wfTimestamp( TS_EXIF, $strTime );
  297. if ( $exifTime ) {
  298. $text['DateTime'] = $exifTime;
  299. }
  300. } elseif ( $chunk_type == 'pHYs' ) {
  301. // how big pixels are (dots per meter).
  302. if ( $chunk_size !== 9 ) {
  303. throw new Exception( __METHOD__ . ": pHYs wrong size" );
  304. }
  305. $buf = self::read( $fh, $chunk_size );
  306. if ( !$buf || strlen( $buf ) < $chunk_size ) {
  307. throw new Exception( __METHOD__ . ": Read error" );
  308. }
  309. $dim = unpack( "Nwidth/Nheight/Cunit", $buf );
  310. if ( $dim['unit'] == 1 ) {
  311. // Need to check for negative because php
  312. // doesn't deal with super-large unsigned 32-bit ints well
  313. if ( $dim['width'] > 0 && $dim['height'] > 0 ) {
  314. // unit is meters
  315. // (as opposed to 0 = undefined )
  316. $text['XResolution'] = $dim['width']
  317. . '/100';
  318. $text['YResolution'] = $dim['height']
  319. . '/100';
  320. $text['ResolutionUnit'] = 3;
  321. // 3 = dots per cm (from Exif).
  322. }
  323. }
  324. } elseif ( $chunk_type == "IEND" ) {
  325. break;
  326. } else {
  327. fseek( $fh, $chunk_size, SEEK_CUR );
  328. }
  329. fseek( $fh, self::$crcSize, SEEK_CUR );
  330. }
  331. fclose( $fh );
  332. if ( $loopCount > 1 ) {
  333. $duration *= $loopCount;
  334. }
  335. if ( isset( $text['DateTimeDigitized'] ) ) {
  336. // Convert date format from rfc2822 to exif.
  337. foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
  338. if ( $name === '_type' ) {
  339. continue;
  340. }
  341. // @todo FIXME: Currently timezones are ignored.
  342. // possibly should be wfTimestamp's
  343. // responsibility. (at least for numeric TZ)
  344. $formatted = wfTimestamp( TS_EXIF, $value );
  345. if ( $formatted ) {
  346. // Only change if we could convert the
  347. // date.
  348. // The png standard says it should be
  349. // in rfc2822 format, but not required.
  350. // In general for the exif stuff we
  351. // prettify the date if we can, but we
  352. // display as-is if we cannot or if
  353. // it is invalid.
  354. // So do the same here.
  355. $value = $formatted;
  356. }
  357. }
  358. }
  359. return [
  360. 'frameCount' => $frameCount,
  361. 'loopCount' => $loopCount,
  362. 'duration' => $duration,
  363. 'text' => $text,
  364. 'bitDepth' => $bitDepth,
  365. 'colorType' => $colorType,
  366. ];
  367. }
  368. /**
  369. * Read a chunk, checking to make sure its not too big.
  370. *
  371. * @param resource $fh The file handle
  372. * @param int $size Size in bytes.
  373. * @throws Exception If too big
  374. * @return string The chunk.
  375. */
  376. private static function read( $fh, $size ) {
  377. if ( $size > self::MAX_CHUNK_SIZE ) {
  378. throw new Exception( __METHOD__ . ': Chunk size of ' . $size .
  379. ' too big. Max size is: ' . self::MAX_CHUNK_SIZE );
  380. }
  381. return fread( $fh, $size );
  382. }
  383. }