123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429 |
- <?php
- /**
- * PNG frame counter and metadata extractor.
- *
- * Slightly derived from GIFMetadataExtractor.php
- * Deliberately not using MWExceptions to avoid external dependencies, encouraging
- * redistribution.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- * @ingroup Media
- */
- /**
- * PNG frame counter.
- *
- * @ingroup Media
- */
- class PNGMetadataExtractor {
- /** @var string */
- private static $pngSig;
- /** @var int */
- private static $crcSize;
- /** @var array */
- private static $textChunks;
- const VERSION = 1;
- const MAX_CHUNK_SIZE = 3145728; // 3 megabytes
- static function getMetadata( $filename ) {
- self::$pngSig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
- self::$crcSize = 4;
- /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
- * and https://www.w3.org/TR/PNG/#11keywords
- */
- self::$textChunks = [
- 'xml:com.adobe.xmp' => 'xmp',
- # Artist is unofficial. Author is the recommended
- # keyword in the PNG spec. However some people output
- # Artist so support both.
- 'artist' => 'Artist',
- 'model' => 'Model',
- 'make' => 'Make',
- 'author' => 'Artist',
- 'comment' => 'PNGFileComment',
- 'description' => 'ImageDescription',
- 'title' => 'ObjectName',
- 'copyright' => 'Copyright',
- # Source as in original device used to make image
- # not as in who gave you the image
- 'source' => 'Model',
- 'software' => 'Software',
- 'disclaimer' => 'Disclaimer',
- 'warning' => 'ContentWarning',
- 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
- 'label' => 'Label',
- 'creation time' => 'DateTimeDigitized',
- /* Other potentially useful things - Document */
- ];
- $frameCount = 0;
- $loopCount = 1;
- $text = [];
- $duration = 0.0;
- $bitDepth = 0;
- $colorType = 'unknown';
- if ( !$filename ) {
- throw new Exception( __METHOD__ . ": No file name specified" );
- } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
- throw new Exception( __METHOD__ . ": File $filename does not exist" );
- }
- $fh = fopen( $filename, 'rb' );
- if ( !$fh ) {
- throw new Exception( __METHOD__ . ": Unable to open file $filename" );
- }
- // Check for the PNG header
- $buf = fread( $fh, 8 );
- if ( $buf != self::$pngSig ) {
- throw new Exception( __METHOD__ . ": Not a valid PNG file; header: $buf" );
- }
- // Read chunks
- while ( !feof( $fh ) ) {
- $buf = fread( $fh, 4 );
- if ( !$buf || strlen( $buf ) < 4 ) {
- throw new Exception( __METHOD__ . ": Read error" );
- }
- $chunk_size = unpack( "N", $buf )[1];
- if ( $chunk_size < 0 ) {
- throw new Exception( __METHOD__ . ": Chunk size too big for unpack" );
- }
- $chunk_type = fread( $fh, 4 );
- if ( !$chunk_type || strlen( $chunk_type ) < 4 ) {
- throw new Exception( __METHOD__ . ": Read error" );
- }
- if ( $chunk_type == "IHDR" ) {
- $buf = self::read( $fh, $chunk_size );
- if ( !$buf || strlen( $buf ) < $chunk_size ) {
- throw new Exception( __METHOD__ . ": Read error" );
- }
- $width = unpack( 'N', substr( $buf, 0, 4 ) )[1];
- $height = unpack( 'N', substr( $buf, 4, 4 ) )[1];
- $bitDepth = ord( substr( $buf, 8, 1 ) );
- // Detect the color type in British English as per the spec
- // https://www.w3.org/TR/PNG/#11IHDR
- switch ( ord( substr( $buf, 9, 1 ) ) ) {
- case 0:
- $colorType = 'greyscale';
- break;
- case 2:
- $colorType = 'truecolour';
- break;
- case 3:
- $colorType = 'index-coloured';
- break;
- case 4:
- $colorType = 'greyscale-alpha';
- break;
- case 6:
- $colorType = 'truecolour-alpha';
- break;
- default:
- $colorType = 'unknown';
- break;
- }
- } elseif ( $chunk_type == "acTL" ) {
- $buf = fread( $fh, $chunk_size );
- if ( !$buf || strlen( $buf ) < $chunk_size || $chunk_size < 4 ) {
- throw new Exception( __METHOD__ . ": Read error" );
- }
- $actl = unpack( "Nframes/Nplays", $buf );
- $frameCount = $actl['frames'];
- $loopCount = $actl['plays'];
- } elseif ( $chunk_type == "fcTL" ) {
- $buf = self::read( $fh, $chunk_size );
- if ( !$buf || strlen( $buf ) < $chunk_size ) {
- throw new Exception( __METHOD__ . ": Read error" );
- }
- $buf = substr( $buf, 20 );
- if ( strlen( $buf ) < 4 ) {
- throw new Exception( __METHOD__ . ": Read error" );
- }
- $fctldur = unpack( "ndelay_num/ndelay_den", $buf );
- if ( $fctldur['delay_den'] == 0 ) {
- $fctldur['delay_den'] = 100;
- }
- if ( $fctldur['delay_num'] ) {
- $duration += $fctldur['delay_num'] / $fctldur['delay_den'];
- }
- } elseif ( $chunk_type == "iTXt" ) {
- // Extracts iTXt chunks, uncompressing if necessary.
- $buf = self::read( $fh, $chunk_size );
- $items = [];
- if ( preg_match(
- '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
- $buf, $items )
- ) {
- /* $items[1] = text chunk name, $items[2] = compressed flag,
- * $items[3] = lang code (or ""), $items[4]= compression type.
- * $items[5] = content
- */
- // Theoretically should be case-sensitive, but in practise...
- $items[1] = strtolower( $items[1] );
- if ( !isset( self::$textChunks[$items[1]] ) ) {
- // Only extract textual chunks on our list.
- fseek( $fh, self::$crcSize, SEEK_CUR );
- continue;
- }
- $items[3] = strtolower( $items[3] );
- if ( $items[3] == '' ) {
- // if no lang specified use x-default like in xmp.
- $items[3] = 'x-default';
- }
- // if compressed
- if ( $items[2] == "\x01" ) {
- if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
- Wikimedia\suppressWarnings();
- $items[5] = gzuncompress( $items[5] );
- Wikimedia\restoreWarnings();
- if ( $items[5] === false ) {
- // decompression failed
- wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] . "\n" );
- fseek( $fh, self::$crcSize, SEEK_CUR );
- continue;
- }
- } else {
- wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,'
- . " or potentially invalid compression method\n" );
- fseek( $fh, self::$crcSize, SEEK_CUR );
- continue;
- }
- }
- $finalKeyword = self::$textChunks[$items[1]];
- $text[$finalKeyword][$items[3]] = $items[5];
- $text[$finalKeyword]['_type'] = 'lang';
- } else {
- // Error reading iTXt chunk
- throw new Exception( __METHOD__ . ": Read error on iTXt chunk" );
- }
- } elseif ( $chunk_type == 'tEXt' ) {
- $buf = self::read( $fh, $chunk_size );
- // In case there is no \x00 which will make explode fail.
- if ( strpos( $buf, "\x00" ) === false ) {
- throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
- }
- list( $keyword, $content ) = explode( "\x00", $buf, 2 );
- if ( $keyword === '' || $content === '' ) {
- throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
- }
- // Theoretically should be case-sensitive, but in practise...
- $keyword = strtolower( $keyword );
- if ( !isset( self::$textChunks[$keyword] ) ) {
- // Don't recognize chunk, so skip.
- fseek( $fh, self::$crcSize, SEEK_CUR );
- continue;
- }
- Wikimedia\suppressWarnings();
- $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
- Wikimedia\restoreWarnings();
- if ( $content === false ) {
- throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
- }
- $finalKeyword = self::$textChunks[$keyword];
- $text[$finalKeyword]['x-default'] = $content;
- $text[$finalKeyword]['_type'] = 'lang';
- } elseif ( $chunk_type == 'zTXt' ) {
- if ( function_exists( 'gzuncompress' ) ) {
- $buf = self::read( $fh, $chunk_size );
- // In case there is no \x00 which will make explode fail.
- if ( strpos( $buf, "\x00" ) === false ) {
- throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
- }
- list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 );
- if ( $keyword === '' || $postKeyword === '' ) {
- throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
- }
- // Theoretically should be case-sensitive, but in practise...
- $keyword = strtolower( $keyword );
- if ( !isset( self::$textChunks[$keyword] ) ) {
- // Don't recognize chunk, so skip.
- fseek( $fh, self::$crcSize, SEEK_CUR );
- continue;
- }
- $compression = substr( $postKeyword, 0, 1 );
- $content = substr( $postKeyword, 1 );
- if ( $compression !== "\x00" ) {
- wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping.\n" );
- fseek( $fh, self::$crcSize, SEEK_CUR );
- continue;
- }
- Wikimedia\suppressWarnings();
- $content = gzuncompress( $content );
- Wikimedia\restoreWarnings();
- if ( $content === false ) {
- // decompression failed
- wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword . "\n" );
- fseek( $fh, self::$crcSize, SEEK_CUR );
- continue;
- }
- Wikimedia\suppressWarnings();
- $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
- Wikimedia\restoreWarnings();
- if ( $content === false ) {
- throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
- }
- $finalKeyword = self::$textChunks[$keyword];
- $text[$finalKeyword]['x-default'] = $content;
- $text[$finalKeyword]['_type'] = 'lang';
- } else {
- wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping.\n" );
- fseek( $fh, $chunk_size, SEEK_CUR );
- }
- } elseif ( $chunk_type == 'tIME' ) {
- // last mod timestamp.
- if ( $chunk_size !== 7 ) {
- throw new Exception( __METHOD__ . ": tIME wrong size" );
- }
- $buf = self::read( $fh, $chunk_size );
- if ( !$buf || strlen( $buf ) < $chunk_size ) {
- throw new Exception( __METHOD__ . ": Read error" );
- }
- // Note: spec says this should be UTC.
- $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
- $strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
- $t['y'], $t['m'], $t['d'], $t['h'],
- $t['min'], $t['s'] );
- $exifTime = wfTimestamp( TS_EXIF, $strTime );
- if ( $exifTime ) {
- $text['DateTime'] = $exifTime;
- }
- } elseif ( $chunk_type == 'pHYs' ) {
- // how big pixels are (dots per meter).
- if ( $chunk_size !== 9 ) {
- throw new Exception( __METHOD__ . ": pHYs wrong size" );
- }
- $buf = self::read( $fh, $chunk_size );
- if ( !$buf || strlen( $buf ) < $chunk_size ) {
- throw new Exception( __METHOD__ . ": Read error" );
- }
- $dim = unpack( "Nwidth/Nheight/Cunit", $buf );
- if ( $dim['unit'] == 1 ) {
- // Need to check for negative because php
- // doesn't deal with super-large unsigned 32-bit ints well
- if ( $dim['width'] > 0 && $dim['height'] > 0 ) {
- // unit is meters
- // (as opposed to 0 = undefined )
- $text['XResolution'] = $dim['width']
- . '/100';
- $text['YResolution'] = $dim['height']
- . '/100';
- $text['ResolutionUnit'] = 3;
- // 3 = dots per cm (from Exif).
- }
- }
- } elseif ( $chunk_type == "IEND" ) {
- break;
- } else {
- fseek( $fh, $chunk_size, SEEK_CUR );
- }
- fseek( $fh, self::$crcSize, SEEK_CUR );
- }
- fclose( $fh );
- if ( $loopCount > 1 ) {
- $duration *= $loopCount;
- }
- if ( isset( $text['DateTimeDigitized'] ) ) {
- // Convert date format from rfc2822 to exif.
- foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
- if ( $name === '_type' ) {
- continue;
- }
- // @todo FIXME: Currently timezones are ignored.
- // possibly should be wfTimestamp's
- // responsibility. (at least for numeric TZ)
- $formatted = wfTimestamp( TS_EXIF, $value );
- if ( $formatted ) {
- // Only change if we could convert the
- // date.
- // The png standard says it should be
- // in rfc2822 format, but not required.
- // In general for the exif stuff we
- // prettify the date if we can, but we
- // display as-is if we cannot or if
- // it is invalid.
- // So do the same here.
- $value = $formatted;
- }
- }
- }
- return [
- 'frameCount' => $frameCount,
- 'loopCount' => $loopCount,
- 'duration' => $duration,
- 'text' => $text,
- 'bitDepth' => $bitDepth,
- 'colorType' => $colorType,
- ];
- }
- /**
- * Read a chunk, checking to make sure its not too big.
- *
- * @param resource $fh The file handle
- * @param int $size Size in bytes.
- * @throws Exception If too big
- * @return string The chunk.
- */
- private static function read( $fh, $size ) {
- if ( $size > self::MAX_CHUNK_SIZE ) {
- throw new Exception( __METHOD__ . ': Chunk size of ' . $size .
- ' too big. Max size is: ' . self::MAX_CHUNK_SIZE );
- }
- return fread( $fh, $size );
- }
- }
|