DjVuImage.php 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. <?php
  2. /**
  3. *
  4. * Copyright (C) 2006 Brion Vibber <brion@pobox.com>
  5. * http://www.mediawiki.org/
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License along
  18. * with this program; if not, write to the Free Software Foundation, Inc.,
  19. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20. * http://www.gnu.org/copyleft/gpl.html
  21. *
  22. */
  23. /**
  24. * Support for detecting/validating DjVu image files and getting
  25. * some basic file metadata (resolution etc)
  26. *
  27. * File format docs are available in source package for DjVuLibre:
  28. * http://djvulibre.djvuzone.org/
  29. *
  30. * @ingroup Media
  31. */
  32. class DjVuImage {
  33. function __construct( $filename ) {
  34. $this->mFilename = $filename;
  35. }
  36. /**
  37. * Check if the given file is indeed a valid DjVu image file
  38. * @return bool
  39. */
  40. public function isValid() {
  41. $info = $this->getInfo();
  42. return $info !== false;
  43. }
  44. /**
  45. * Return data in the style of getimagesize()
  46. * @return array or false on failure
  47. */
  48. public function getImageSize() {
  49. $data = $this->getInfo();
  50. if( $data !== false ) {
  51. $width = $data['width'];
  52. $height = $data['height'];
  53. return array( $width, $height, 'DjVu',
  54. "width=\"$width\" height=\"$height\"" );
  55. }
  56. return false;
  57. }
  58. // ---------
  59. /**
  60. * For debugging; dump the IFF chunk structure
  61. */
  62. function dump() {
  63. $file = fopen( $this->mFilename, 'rb' );
  64. $header = fread( $file, 12 );
  65. // FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
  66. extract( unpack( 'a4magic/a4chunk/NchunkLength', $header ) );
  67. echo "$chunk $chunkLength\n";
  68. $this->dumpForm( $file, $chunkLength, 1 );
  69. fclose( $file );
  70. }
  71. private function dumpForm( $file, $length, $indent ) {
  72. $start = ftell( $file );
  73. $secondary = fread( $file, 4 );
  74. echo str_repeat( ' ', $indent * 4 ) . "($secondary)\n";
  75. while( ftell( $file ) - $start < $length ) {
  76. $chunkHeader = fread( $file, 8 );
  77. if( $chunkHeader == '' ) {
  78. break;
  79. }
  80. // FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
  81. extract( unpack( 'a4chunk/NchunkLength', $chunkHeader ) );
  82. echo str_repeat( ' ', $indent * 4 ) . "$chunk $chunkLength\n";
  83. if( $chunk == 'FORM' ) {
  84. $this->dumpForm( $file, $chunkLength, $indent + 1 );
  85. } else {
  86. fseek( $file, $chunkLength, SEEK_CUR );
  87. if( $chunkLength & 1 == 1 ) {
  88. // Padding byte between chunks
  89. fseek( $file, 1, SEEK_CUR );
  90. }
  91. }
  92. }
  93. }
  94. function getInfo() {
  95. wfSuppressWarnings();
  96. $file = fopen( $this->mFilename, 'rb' );
  97. wfRestoreWarnings();
  98. if( $file === false ) {
  99. wfDebug( __METHOD__ . ": missing or failed file read\n" );
  100. return false;
  101. }
  102. $header = fread( $file, 16 );
  103. $info = false;
  104. if( strlen( $header ) < 16 ) {
  105. wfDebug( __METHOD__ . ": too short file header\n" );
  106. } else {
  107. // FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
  108. extract( unpack( 'a4magic/a4form/NformLength/a4subtype', $header ) );
  109. if( $magic != 'AT&T' ) {
  110. wfDebug( __METHOD__ . ": not a DjVu file\n" );
  111. } elseif( $subtype == 'DJVU' ) {
  112. // Single-page document
  113. $info = $this->getPageInfo( $file, $formLength );
  114. } elseif( $subtype == 'DJVM' ) {
  115. // Multi-page document
  116. $info = $this->getMultiPageInfo( $file, $formLength );
  117. } else {
  118. wfDebug( __METHOD__ . ": unrecognized DJVU file type '$formType'\n" );
  119. }
  120. }
  121. fclose( $file );
  122. return $info;
  123. }
  124. private function readChunk( $file ) {
  125. $header = fread( $file, 8 );
  126. if( strlen( $header ) < 8 ) {
  127. return array( false, 0 );
  128. } else {
  129. // FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
  130. extract( unpack( 'a4chunk/Nlength', $header ) );
  131. return array( $chunk, $length );
  132. }
  133. }
  134. private function skipChunk( $file, $chunkLength ) {
  135. fseek( $file, $chunkLength, SEEK_CUR );
  136. if( $chunkLength & 0x01 == 1 && !feof( $file ) ) {
  137. // padding byte
  138. fseek( $file, 1, SEEK_CUR );
  139. }
  140. }
  141. private function getMultiPageInfo( $file, $formLength ) {
  142. // For now, we'll just look for the first page in the file
  143. // and report its information, hoping others are the same size.
  144. $start = ftell( $file );
  145. do {
  146. list( $chunk, $length ) = $this->readChunk( $file );
  147. if( !$chunk ) {
  148. break;
  149. }
  150. if( $chunk == 'FORM' ) {
  151. $subtype = fread( $file, 4 );
  152. if( $subtype == 'DJVU' ) {
  153. wfDebug( __METHOD__ . ": found first subpage\n" );
  154. return $this->getPageInfo( $file, $length );
  155. }
  156. $this->skipChunk( $file, $length - 4 );
  157. } else {
  158. wfDebug( __METHOD__ . ": skipping '$chunk' chunk\n" );
  159. $this->skipChunk( $file, $length );
  160. }
  161. } while( $length != 0 && !feof( $file ) && ftell( $file ) - $start < $formLength );
  162. wfDebug( __METHOD__ . ": multi-page DJVU file contained no pages\n" );
  163. return false;
  164. }
  165. private function getPageInfo( $file, $formLength ) {
  166. list( $chunk, $length ) = $this->readChunk( $file );
  167. if( $chunk != 'INFO' ) {
  168. wfDebug( __METHOD__ . ": expected INFO chunk, got '$chunk'\n" );
  169. return false;
  170. }
  171. if( $length < 9 ) {
  172. wfDebug( __METHOD__ . ": INFO should be 9 or 10 bytes, found $length\n" );
  173. return false;
  174. }
  175. $data = fread( $file, $length );
  176. if( strlen( $data ) < $length ) {
  177. wfDebug( __METHOD__ . ": INFO chunk cut off\n" );
  178. return false;
  179. }
  180. // FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
  181. extract( unpack(
  182. 'nwidth/' .
  183. 'nheight/' .
  184. 'Cminor/' .
  185. 'Cmajor/' .
  186. 'vresolution/' .
  187. 'Cgamma', $data ) );
  188. # Newer files have rotation info in byte 10, but we don't use it yet.
  189. return array(
  190. 'width' => $width,
  191. 'height' => $height,
  192. 'version' => "$major.$minor",
  193. 'resolution' => $resolution,
  194. 'gamma' => $gamma / 10.0 );
  195. }
  196. /**
  197. * Return an XML string describing the DjVu image
  198. * @return string
  199. */
  200. function retrieveMetaData() {
  201. global $wgDjvuToXML, $wgDjvuDump;
  202. if ( isset( $wgDjvuDump ) ) {
  203. # djvudump is faster as of version 3.5
  204. # http://sourceforge.net/tracker/index.php?func=detail&aid=1704049&group_id=32953&atid=406583
  205. wfProfileIn( 'djvudump' );
  206. $cmd = wfEscapeShellArg( $wgDjvuDump ) . ' ' . wfEscapeShellArg( $this->mFilename );
  207. $dump = wfShellExec( $cmd );
  208. $xml = $this->convertDumpToXML( $dump );
  209. wfProfileOut( 'djvudump' );
  210. } elseif ( isset( $wgDjvuToXML ) ) {
  211. wfProfileIn( 'djvutoxml' );
  212. $cmd = wfEscapeShellArg( $wgDjvuToXML ) . ' --without-anno --without-text ' .
  213. wfEscapeShellArg( $this->mFilename );
  214. $xml = wfShellExec( $cmd );
  215. wfProfileOut( 'djvutoxml' );
  216. } else {
  217. $xml = null;
  218. }
  219. return $xml;
  220. }
  221. /**
  222. * Hack to temporarily work around djvutoxml bug
  223. */
  224. function convertDumpToXML( $dump ) {
  225. if ( strval( $dump ) == '' ) {
  226. return false;
  227. }
  228. $xml = <<<EOT
  229. <?xml version="1.0" ?>
  230. <!DOCTYPE DjVuXML PUBLIC "-//W3C//DTD DjVuXML 1.1//EN" "pubtext/DjVuXML-s.dtd">
  231. <DjVuXML>
  232. <HEAD></HEAD>
  233. <BODY>
  234. EOT;
  235. $dump = str_replace( "\r", '', $dump );
  236. $line = strtok( $dump, "\n" );
  237. $m = false;
  238. $good = false;
  239. if ( preg_match( '/^( *)FORM:DJVU/', $line, $m ) ) {
  240. # Single-page
  241. if ( $this->parseFormDjvu( $line, $xml ) ) {
  242. $good = true;
  243. } else {
  244. return false;
  245. }
  246. } elseif ( preg_match( '/^( *)FORM:DJVM/', $line, $m ) ) {
  247. # Multi-page
  248. $parentLevel = strlen( $m[1] );
  249. # Find DIRM
  250. $line = strtok( "\n" );
  251. while ( $line !== false ) {
  252. $childLevel = strspn( $line, ' ' );
  253. if ( $childLevel <= $parentLevel ) {
  254. # End of chunk
  255. break;
  256. }
  257. if ( preg_match( '/^ *DIRM.*indirect/', $line ) ) {
  258. wfDebug( "Indirect multi-page DjVu document, bad for server!\n" );
  259. return false;
  260. }
  261. if ( preg_match( '/^ *FORM:DJVU/', $line ) ) {
  262. # Found page
  263. if ( $this->parseFormDjvu( $line, $xml ) ) {
  264. $good = true;
  265. } else {
  266. return false;
  267. }
  268. }
  269. $line = strtok( "\n" );
  270. }
  271. }
  272. if ( !$good ) {
  273. return false;
  274. }
  275. $xml .= "</BODY>\n</DjVuXML>\n";
  276. return $xml;
  277. }
  278. function parseFormDjvu( $line, &$xml ) {
  279. $parentLevel = strspn( $line, ' ' );
  280. $line = strtok( "\n" );
  281. # Find INFO
  282. while ( $line !== false ) {
  283. $childLevel = strspn( $line, ' ' );
  284. if ( $childLevel <= $parentLevel ) {
  285. # End of chunk
  286. break;
  287. }
  288. if ( preg_match( '/^ *INFO *\[\d*\] *DjVu *(\d+)x(\d+), *\w*, *(\d+) *dpi, *gamma=([0-9.-]+)/', $line, $m ) ) {
  289. $xml .= Xml::tags( 'OBJECT',
  290. array(
  291. #'data' => '',
  292. #'type' => 'image/x.djvu',
  293. 'height' => $m[2],
  294. 'width' => $m[1],
  295. #'usemap' => '',
  296. ),
  297. "\n" .
  298. Xml::element( 'PARAM', array( 'name' => 'DPI', 'value' => $m[3] ) ) . "\n" .
  299. Xml::element( 'PARAM', array( 'name' => 'GAMMA', 'value' => $m[4] ) ) . "\n"
  300. ) . "\n";
  301. return true;
  302. }
  303. $line = strtok( "\n" );
  304. }
  305. # Not found
  306. return false;
  307. }
  308. }