GIFMetadataExtractor.php 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. <?php
  2. /**
  3. * GIF frame counter.
  4. *
  5. * Originally written in Perl by Steve Sanbeg.
  6. * Ported to PHP by Andrew Garrett
  7. * Deliberately not using MWExceptions to avoid external dependencies, encouraging
  8. * redistribution.
  9. *
  10. * This program is free software; you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License as published by
  12. * the Free Software Foundation; either version 2 of the License, or
  13. * (at your option) any later version.
  14. *
  15. * This program is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  18. * GNU General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU General Public License along
  21. * with this program; if not, write to the Free Software Foundation, Inc.,
  22. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  23. * http://www.gnu.org/copyleft/gpl.html
  24. *
  25. * @file
  26. * @ingroup Media
  27. */
  28. /**
  29. * GIF frame counter.
  30. *
  31. * @ingroup Media
  32. */
  33. class GIFMetadataExtractor {
  34. /** @var string */
  35. private static $gifFrameSep;
  36. /** @var string */
  37. private static $gifExtensionSep;
  38. /** @var string */
  39. private static $gifTerm;
  40. const VERSION = 1;
  41. // Each sub-block is less than or equal to 255 bytes.
  42. // Most of the time its 255 bytes, except for in XMP
  43. // blocks, where it's usually between 32-127 bytes each.
  44. const MAX_SUBBLOCKS = 262144; // 5mb divided by 20.
  45. /**
  46. * @throws Exception
  47. * @param string $filename
  48. * @return array
  49. */
  50. static function getMetadata( $filename ) {
  51. self::$gifFrameSep = pack( "C", ord( "," ) ); // 2C
  52. self::$gifExtensionSep = pack( "C", ord( "!" ) ); // 21
  53. self::$gifTerm = pack( "C", ord( ";" ) ); // 3B
  54. $frameCount = 0;
  55. $duration = 0.0;
  56. $isLooped = false;
  57. $xmp = "";
  58. $comment = [];
  59. if ( !$filename ) {
  60. throw new Exception( "No file name specified" );
  61. } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
  62. throw new Exception( "File $filename does not exist" );
  63. }
  64. $fh = fopen( $filename, 'rb' );
  65. if ( !$fh ) {
  66. throw new Exception( "Unable to open file $filename" );
  67. }
  68. // Check for the GIF header
  69. $buf = fread( $fh, 6 );
  70. if ( !( $buf == 'GIF87a' || $buf == 'GIF89a' ) ) {
  71. throw new Exception( "Not a valid GIF file; header: $buf" );
  72. }
  73. // Read width and height.
  74. $buf = fread( $fh, 2 );
  75. $width = unpack( 'v', $buf )[1];
  76. $buf = fread( $fh, 2 );
  77. $height = unpack( 'v', $buf )[1];
  78. // Read BPP
  79. $buf = fread( $fh, 1 );
  80. $bpp = self::decodeBPP( $buf );
  81. // Skip over background and aspect ratio
  82. fread( $fh, 2 );
  83. // Skip over the GCT
  84. self::readGCT( $fh, $bpp );
  85. while ( !feof( $fh ) ) {
  86. $buf = fread( $fh, 1 );
  87. if ( $buf == self::$gifFrameSep ) {
  88. // Found a frame
  89. $frameCount++;
  90. # # Skip bounding box
  91. fread( $fh, 8 );
  92. # # Read BPP
  93. $buf = fread( $fh, 1 );
  94. $bpp = self::decodeBPP( $buf );
  95. # # Read GCT
  96. self::readGCT( $fh, $bpp );
  97. fread( $fh, 1 );
  98. self::skipBlock( $fh );
  99. } elseif ( $buf == self::$gifExtensionSep ) {
  100. $buf = fread( $fh, 1 );
  101. if ( strlen( $buf ) < 1 ) {
  102. throw new Exception( "Ran out of input" );
  103. }
  104. $extension_code = unpack( 'C', $buf )[1];
  105. if ( $extension_code == 0xF9 ) {
  106. // Graphics Control Extension.
  107. fread( $fh, 1 ); // Block size
  108. fread( $fh, 1 ); // Transparency, disposal method, user input
  109. $buf = fread( $fh, 2 ); // Delay, in hundredths of seconds.
  110. if ( strlen( $buf ) < 2 ) {
  111. throw new Exception( "Ran out of input" );
  112. }
  113. $delay = unpack( 'v', $buf )[1];
  114. $duration += $delay * 0.01;
  115. fread( $fh, 1 ); // Transparent colour index
  116. $term = fread( $fh, 1 ); // Should be a terminator
  117. if ( strlen( $term ) < 1 ) {
  118. throw new Exception( "Ran out of input" );
  119. }
  120. $term = unpack( 'C', $term )[1];
  121. if ( $term != 0 ) {
  122. throw new Exception( "Malformed Graphics Control Extension block" );
  123. }
  124. } elseif ( $extension_code == 0xFE ) {
  125. // Comment block(s).
  126. $data = self::readBlock( $fh );
  127. if ( $data === "" ) {
  128. throw new Exception( 'Read error, zero-length comment block' );
  129. }
  130. // The standard says this should be ASCII, however its unclear if
  131. // thats true in practise. Check to see if its valid utf-8, if so
  132. // assume its that, otherwise assume its windows-1252 (iso-8859-1)
  133. $dataCopy = $data;
  134. // quickIsNFCVerify has the side effect of replacing any invalid characters
  135. UtfNormal\Validator::quickIsNFCVerify( $dataCopy );
  136. if ( $dataCopy !== $data ) {
  137. Wikimedia\suppressWarnings();
  138. $data = iconv( 'windows-1252', 'UTF-8', $data );
  139. Wikimedia\restoreWarnings();
  140. }
  141. $commentCount = count( $comment );
  142. if ( $commentCount === 0
  143. || $comment[$commentCount - 1] !== $data
  144. ) {
  145. // Some applications repeat the same comment on each
  146. // frame of an animated GIF image, so if this comment
  147. // is identical to the last, only extract once.
  148. $comment[] = $data;
  149. }
  150. } elseif ( $extension_code == 0xFF ) {
  151. // Application extension (Netscape info about the animated gif)
  152. // or XMP (or theoretically any other type of extension block)
  153. $blockLength = fread( $fh, 1 );
  154. if ( strlen( $blockLength ) < 1 ) {
  155. throw new Exception( "Ran out of input" );
  156. }
  157. $blockLength = unpack( 'C', $blockLength )[1];
  158. $data = fread( $fh, $blockLength );
  159. if ( $blockLength != 11 ) {
  160. wfDebug( __METHOD__ . " GIF application block with wrong length\n" );
  161. fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
  162. self::skipBlock( $fh );
  163. continue;
  164. }
  165. // NETSCAPE2.0 (application name for animated gif)
  166. if ( $data == 'NETSCAPE2.0' ) {
  167. $data = fread( $fh, 2 ); // Block length and introduction, should be 03 01
  168. if ( $data != "\x03\x01" ) {
  169. throw new Exception( "Expected \x03\x01, got $data" );
  170. }
  171. // Unsigned little-endian integer, loop count or zero for "forever"
  172. $loopData = fread( $fh, 2 );
  173. if ( strlen( $loopData ) < 2 ) {
  174. throw new Exception( "Ran out of input" );
  175. }
  176. $loopCount = unpack( 'v', $loopData )[1];
  177. if ( $loopCount != 1 ) {
  178. $isLooped = true;
  179. }
  180. // Read out terminator byte
  181. fread( $fh, 1 );
  182. } elseif ( $data == 'XMP DataXMP' ) {
  183. // application name for XMP data.
  184. // see pg 18 of XMP spec part 3.
  185. $xmp = self::readBlock( $fh, true );
  186. if ( substr( $xmp, -257, 3 ) !== "\x01\xFF\xFE"
  187. || substr( $xmp, -4 ) !== "\x03\x02\x01\x00"
  188. ) {
  189. // this is just a sanity check.
  190. throw new Exception( "XMP does not have magic trailer!" );
  191. }
  192. // strip out trailer.
  193. $xmp = substr( $xmp, 0, -257 );
  194. } else {
  195. // unrecognized extension block
  196. fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
  197. self::skipBlock( $fh );
  198. continue;
  199. }
  200. } else {
  201. self::skipBlock( $fh );
  202. }
  203. } elseif ( $buf == self::$gifTerm ) {
  204. break;
  205. } else {
  206. if ( strlen( $buf ) < 1 ) {
  207. throw new Exception( "Ran out of input" );
  208. }
  209. $byte = unpack( 'C', $buf )[1];
  210. throw new Exception( "At position: " . ftell( $fh ) . ", Unknown byte " . $byte );
  211. }
  212. }
  213. return [
  214. 'frameCount' => $frameCount,
  215. 'looped' => $isLooped,
  216. 'duration' => $duration,
  217. 'xmp' => $xmp,
  218. 'comment' => $comment,
  219. ];
  220. }
  221. /**
  222. * @param resource $fh
  223. * @param int $bpp
  224. * @return void
  225. */
  226. static function readGCT( $fh, $bpp ) {
  227. if ( $bpp > 0 ) {
  228. $max = 2 ** $bpp;
  229. for ( $i = 1; $i <= $max; ++$i ) {
  230. fread( $fh, 3 );
  231. }
  232. }
  233. }
  234. /**
  235. * @param string $data
  236. * @throws Exception
  237. * @return int
  238. */
  239. static function decodeBPP( $data ) {
  240. if ( strlen( $data ) < 1 ) {
  241. throw new Exception( "Ran out of input" );
  242. }
  243. $buf = unpack( 'C', $data )[1];
  244. $bpp = ( $buf & 7 ) + 1;
  245. // @phan-suppress-next-line PhanTypeInvalidLeftOperandOfIntegerOp
  246. $buf >>= 7;
  247. $have_map = $buf & 1;
  248. return $have_map ? $bpp : 0;
  249. }
  250. /**
  251. * @param resource $fh
  252. * @throws Exception
  253. */
  254. static function skipBlock( $fh ) {
  255. while ( !feof( $fh ) ) {
  256. $buf = fread( $fh, 1 );
  257. if ( strlen( $buf ) < 1 ) {
  258. throw new Exception( "Ran out of input" );
  259. }
  260. $block_len = unpack( 'C', $buf )[1];
  261. if ( $block_len == 0 ) {
  262. return;
  263. }
  264. fread( $fh, $block_len );
  265. }
  266. }
  267. /**
  268. * Read a block. In the GIF format, a block is made up of
  269. * several sub-blocks. Each sub block starts with one byte
  270. * saying how long the sub-block is, followed by the sub-block.
  271. * The entire block is terminated by a sub-block of length
  272. * 0.
  273. * @param resource $fh File handle
  274. * @param bool $includeLengths Include the length bytes of the
  275. * sub-blocks in the returned value. Normally this is false,
  276. * except XMP is weird and does a hack where you need to keep
  277. * these length bytes.
  278. * @throws Exception
  279. * @return string The data.
  280. */
  281. static function readBlock( $fh, $includeLengths = false ) {
  282. $data = '';
  283. $subLength = fread( $fh, 1 );
  284. $blocks = 0;
  285. while ( $subLength !== "\0" ) {
  286. $blocks++;
  287. if ( $blocks > self::MAX_SUBBLOCKS ) {
  288. throw new Exception( "MAX_SUBBLOCKS exceeded (over $blocks sub-blocks)" );
  289. }
  290. if ( feof( $fh ) ) {
  291. throw new Exception( "Read error: Unexpected EOF." );
  292. }
  293. if ( $includeLengths ) {
  294. $data .= $subLength;
  295. }
  296. $data .= fread( $fh, ord( $subLength ) );
  297. $subLength = fread( $fh, 1 );
  298. }
  299. return $data;
  300. }
  301. }