SVGReader.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. <?php
  2. /**
  3. * Extraction of SVG image metadata.
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. * @ingroup Media
  22. * @author "Derk-Jan Hartman <hartman _at_ videolan d0t org>"
  23. * @author Brion Vibber
  24. * @copyright Copyright © 2010-2010 Brion Vibber, Derk-Jan Hartman
  25. * @license GPL-2.0-or-later
  26. */
  27. /**
  28. * @ingroup Media
  29. */
  30. class SVGReader {
  31. const DEFAULT_WIDTH = 512;
  32. const DEFAULT_HEIGHT = 512;
  33. const NS_SVG = 'http://www.w3.org/2000/svg';
  34. const LANG_PREFIX_MATCH = 1;
  35. const LANG_FULL_MATCH = 2;
  36. /** @var null|XMLReader */
  37. private $reader = null;
  38. /** @var bool */
  39. private $mDebug = false;
  40. /** @var array */
  41. private $metadata = [];
  42. private $languages = [];
  43. private $languagePrefixes = [];
  44. /**
  45. * Creates an SVGReader drawing from the source provided
  46. * @param string $source URI from which to read
  47. * @throws MWException|Exception
  48. */
  49. function __construct( $source ) {
  50. global $wgSVGMetadataCutoff;
  51. $this->reader = new XMLReader();
  52. // Don't use $file->getSize() since file object passed to SVGHandler::getMetadata is bogus.
  53. $size = filesize( $source );
  54. if ( $size === false ) {
  55. throw new MWException( "Error getting filesize of SVG." );
  56. }
  57. if ( $size > $wgSVGMetadataCutoff ) {
  58. $this->debug( "SVG is $size bytes, which is bigger than $wgSVGMetadataCutoff. Truncating." );
  59. $contents = file_get_contents( $source, false, null, 0, $wgSVGMetadataCutoff );
  60. if ( $contents === false ) {
  61. throw new MWException( 'Error reading SVG file.' );
  62. }
  63. $this->reader->XML( $contents, null, LIBXML_NOERROR | LIBXML_NOWARNING );
  64. } else {
  65. $this->reader->open( $source, null, LIBXML_NOERROR | LIBXML_NOWARNING );
  66. }
  67. // Expand entities, since Adobe Illustrator uses them for xmlns
  68. // attributes (T33719). Note that libxml2 has some protection
  69. // against large recursive entity expansions so this is not as
  70. // insecure as it might appear to be. However, it is still extremely
  71. // insecure. It's necessary to wrap any read() calls with
  72. // libxml_disable_entity_loader() to avoid arbitrary local file
  73. // inclusion, or even arbitrary code execution if the expect
  74. // extension is installed (T48859).
  75. $oldDisable = libxml_disable_entity_loader( true );
  76. $this->reader->setParserProperty( XMLReader::SUBST_ENTITIES, true );
  77. $this->metadata['width'] = self::DEFAULT_WIDTH;
  78. $this->metadata['height'] = self::DEFAULT_HEIGHT;
  79. // The size in the units specified by the SVG file
  80. // (for the metadata box)
  81. // Per the SVG spec, if unspecified, default to '100%'
  82. $this->metadata['originalWidth'] = '100%';
  83. $this->metadata['originalHeight'] = '100%';
  84. // Because we cut off the end of the svg making an invalid one. Complicated
  85. // try catch thing to make sure warnings get restored. Seems like there should
  86. // be a better way.
  87. Wikimedia\suppressWarnings();
  88. try {
  89. $this->read();
  90. } catch ( Exception $e ) {
  91. // Note, if this happens, the width/height will be taken to be 0x0.
  92. // Should we consider it the default 512x512 instead?
  93. Wikimedia\restoreWarnings();
  94. libxml_disable_entity_loader( $oldDisable );
  95. throw $e;
  96. }
  97. Wikimedia\restoreWarnings();
  98. libxml_disable_entity_loader( $oldDisable );
  99. }
  100. /**
  101. * @return array Array with the known metadata
  102. */
  103. public function getMetadata() {
  104. return $this->metadata;
  105. }
  106. /**
  107. * Read the SVG
  108. * @throws MWException
  109. * @return bool
  110. */
  111. protected function read() {
  112. $keepReading = $this->reader->read();
  113. /* Skip until first element */
  114. while ( $keepReading && $this->reader->nodeType != XMLReader::ELEMENT ) {
  115. $keepReading = $this->reader->read();
  116. }
  117. if ( $this->reader->localName != 'svg' || $this->reader->namespaceURI != self::NS_SVG ) {
  118. throw new MWException( "Expected <svg> tag, got " .
  119. $this->reader->localName . " in NS " . $this->reader->namespaceURI );
  120. }
  121. $this->debug( "<svg> tag is correct." );
  122. $this->handleSVGAttribs();
  123. $exitDepth = $this->reader->depth;
  124. $keepReading = $this->reader->read();
  125. while ( $keepReading ) {
  126. $tag = $this->reader->localName;
  127. $type = $this->reader->nodeType;
  128. $isSVG = ( $this->reader->namespaceURI == self::NS_SVG );
  129. $this->debug( "$tag" );
  130. if ( $isSVG && $tag == 'svg' && $type == XMLReader::END_ELEMENT
  131. && $this->reader->depth <= $exitDepth
  132. ) {
  133. break;
  134. } elseif ( $isSVG && $tag == 'title' ) {
  135. $this->readField( $tag, 'title' );
  136. } elseif ( $isSVG && $tag == 'desc' ) {
  137. $this->readField( $tag, 'description' );
  138. } elseif ( $isSVG && $tag == 'metadata' && $type == XMLReader::ELEMENT ) {
  139. $this->readXml( 'metadata' );
  140. } elseif ( $isSVG && $tag == 'script' ) {
  141. // We normally do not allow scripted svgs.
  142. // However its possible to configure MW to let them
  143. // in, and such files should be considered animated.
  144. $this->metadata['animated'] = true;
  145. } elseif ( $tag !== '#text' ) {
  146. $this->debug( "Unhandled top-level XML tag $tag" );
  147. // Recurse into children of current tag, looking for animation and languages.
  148. $this->animateFilterAndLang( $tag );
  149. }
  150. // Goto next element, which is sibling of current (Skip children).
  151. $keepReading = $this->reader->next();
  152. }
  153. $this->reader->close();
  154. $this->metadata['translations'] = $this->languages + $this->languagePrefixes;
  155. return true;
  156. }
  157. /**
  158. * Read a textelement from an element
  159. *
  160. * @param string $name Name of the element that we are reading from
  161. * @param string $metafield Field that we will fill with the result
  162. */
  163. private function readField( $name, $metafield = null ) {
  164. $this->debug( "Read field $metafield" );
  165. if ( !$metafield || $this->reader->nodeType != XMLReader::ELEMENT ) {
  166. return;
  167. }
  168. $keepReading = $this->reader->read();
  169. while ( $keepReading ) {
  170. if ( $this->reader->localName == $name
  171. && $this->reader->namespaceURI == self::NS_SVG
  172. && $this->reader->nodeType == XMLReader::END_ELEMENT
  173. ) {
  174. break;
  175. } elseif ( $this->reader->nodeType == XMLReader::TEXT ) {
  176. $this->metadata[$metafield] = trim( $this->reader->value );
  177. }
  178. $keepReading = $this->reader->read();
  179. }
  180. }
  181. /**
  182. * Read an XML snippet from an element
  183. *
  184. * @param string $metafield Field that we will fill with the result
  185. * @throws MWException
  186. */
  187. private function readXml( $metafield = null ) {
  188. $this->debug( "Read top level metadata" );
  189. if ( !$metafield || $this->reader->nodeType != XMLReader::ELEMENT ) {
  190. return;
  191. }
  192. // @todo Find and store type of xml snippet. metadata['metadataType'] = "rdf"
  193. $this->metadata[$metafield] = trim( $this->reader->readInnerXml() );
  194. $this->reader->next();
  195. }
  196. /**
  197. * Filter all children, looking for animated elements.
  198. * Also get a list of languages that can be targeted.
  199. *
  200. * @param string $name Name of the element that we are reading from
  201. */
  202. private function animateFilterAndLang( $name ) {
  203. $this->debug( "animate filter for tag $name" );
  204. if ( $this->reader->nodeType != XMLReader::ELEMENT ) {
  205. return;
  206. }
  207. if ( $this->reader->isEmptyElement ) {
  208. return;
  209. }
  210. $exitDepth = $this->reader->depth;
  211. $keepReading = $this->reader->read();
  212. while ( $keepReading ) {
  213. if ( $this->reader->localName == $name && $this->reader->depth <= $exitDepth
  214. && $this->reader->nodeType == XMLReader::END_ELEMENT
  215. ) {
  216. break;
  217. } elseif ( $this->reader->namespaceURI == self::NS_SVG
  218. && $this->reader->nodeType == XMLReader::ELEMENT
  219. ) {
  220. $sysLang = $this->reader->getAttribute( 'systemLanguage' );
  221. if ( !is_null( $sysLang ) && $sysLang !== '' ) {
  222. // See https://www.w3.org/TR/SVG/struct.html#SystemLanguageAttribute
  223. $langList = explode( ',', $sysLang );
  224. foreach ( $langList as $langItem ) {
  225. $langItem = trim( $langItem );
  226. if ( Language::isWellFormedLanguageTag( $langItem ) ) {
  227. $this->languages[$langItem] = self::LANG_FULL_MATCH;
  228. }
  229. // Note, the standard says that any prefix should work,
  230. // here we do only the initial prefix, since that will catch
  231. // 99% of cases, and we are going to compare against fallbacks.
  232. // This differs mildly from how the spec says languages should be
  233. // handled, however it matches better how the MediaWiki language
  234. // preference is generally handled.
  235. $dash = strpos( $langItem, '-' );
  236. // Intentionally checking both !false and > 0 at the same time.
  237. if ( $dash ) {
  238. $itemPrefix = substr( $langItem, 0, $dash );
  239. if ( Language::isWellFormedLanguageTag( $itemPrefix ) ) {
  240. $this->languagePrefixes[$itemPrefix] = self::LANG_PREFIX_MATCH;
  241. }
  242. }
  243. }
  244. }
  245. switch ( $this->reader->localName ) {
  246. case 'script':
  247. // Normally we disallow files with
  248. // <script>, but its possible
  249. // to configure MW to disable
  250. // such checks.
  251. case 'animate':
  252. case 'set':
  253. case 'animateMotion':
  254. case 'animateColor':
  255. case 'animateTransform':
  256. $this->debug( "HOUSTON WE HAVE ANIMATION" );
  257. $this->metadata['animated'] = true;
  258. break;
  259. }
  260. }
  261. $keepReading = $this->reader->read();
  262. }
  263. }
  264. private function debug( $data ) {
  265. if ( $this->mDebug ) {
  266. wfDebug( "SVGReader: $data\n" );
  267. }
  268. }
  269. /**
  270. * Parse the attributes of an SVG element
  271. *
  272. * The parser has to be in the start element of "<svg>"
  273. */
  274. private function handleSVGAttribs() {
  275. $defaultWidth = self::DEFAULT_WIDTH;
  276. $defaultHeight = self::DEFAULT_HEIGHT;
  277. $aspect = 1.0;
  278. $width = null;
  279. $height = null;
  280. if ( $this->reader->getAttribute( 'viewBox' ) ) {
  281. // min-x min-y width height
  282. $viewBox = preg_split( '/\s*[\s,]\s*/', trim( $this->reader->getAttribute( 'viewBox' ) ) );
  283. if ( count( $viewBox ) == 4 ) {
  284. $viewWidth = $this->scaleSVGUnit( $viewBox[2] );
  285. $viewHeight = $this->scaleSVGUnit( $viewBox[3] );
  286. if ( $viewWidth > 0 && $viewHeight > 0 ) {
  287. $aspect = $viewWidth / $viewHeight;
  288. $defaultHeight = $defaultWidth / $aspect;
  289. }
  290. }
  291. }
  292. if ( $this->reader->getAttribute( 'width' ) ) {
  293. $width = $this->scaleSVGUnit( $this->reader->getAttribute( 'width' ), $defaultWidth );
  294. $this->metadata['originalWidth'] = $this->reader->getAttribute( 'width' );
  295. }
  296. if ( $this->reader->getAttribute( 'height' ) ) {
  297. $height = $this->scaleSVGUnit( $this->reader->getAttribute( 'height' ), $defaultHeight );
  298. $this->metadata['originalHeight'] = $this->reader->getAttribute( 'height' );
  299. }
  300. if ( !isset( $width ) && !isset( $height ) ) {
  301. $width = $defaultWidth;
  302. $height = $width / $aspect;
  303. } elseif ( isset( $width ) && !isset( $height ) ) {
  304. $height = $width / $aspect;
  305. } elseif ( isset( $height ) && !isset( $width ) ) {
  306. $width = $height * $aspect;
  307. }
  308. if ( $width > 0 && $height > 0 ) {
  309. $this->metadata['width'] = intval( round( $width ) );
  310. $this->metadata['height'] = intval( round( $height ) );
  311. }
  312. }
  313. /**
  314. * Return a rounded pixel equivalent for a labeled CSS/SVG length.
  315. * https://www.w3.org/TR/SVG11/coords.html#Units
  316. *
  317. * @param string $length CSS/SVG length.
  318. * @param float|int $viewportSize Optional scale for percentage units...
  319. * @return float Length in pixels
  320. */
  321. static function scaleSVGUnit( $length, $viewportSize = 512 ) {
  322. static $unitLength = [
  323. 'px' => 1.0,
  324. 'pt' => 1.25,
  325. 'pc' => 15.0,
  326. 'mm' => 3.543307,
  327. 'cm' => 35.43307,
  328. 'in' => 90.0,
  329. 'em' => 16.0, // fake it?
  330. 'ex' => 12.0, // fake it?
  331. '' => 1.0, // "User units" pixels by default
  332. ];
  333. $matches = [];
  334. if ( preg_match(
  335. '/^\s*([-+]?\d*(?:\.\d+|\d+)(?:[Ee][-+]?\d+)?)\s*(em|ex|px|pt|pc|cm|mm|in|%|)\s*$/',
  336. $length,
  337. $matches
  338. ) ) {
  339. $length = floatval( $matches[1] );
  340. $unit = $matches[2];
  341. if ( $unit == '%' ) {
  342. return $length * 0.01 * $viewportSize;
  343. } else {
  344. return $length * $unitLength[$unit];
  345. }
  346. } else {
  347. // Assume pixels
  348. return floatval( $length );
  349. }
  350. }
  351. }