Exif.php 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859
  1. <?php
  2. /**
  3. * Extraction and validation of image metadata.
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @ingroup Media
  21. * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com>
  22. * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason, 2009 Brent Garber
  23. * @license GPL-2.0-or-later
  24. * @see http://exif.org/Exif2-2.PDF The Exif 2.2 specification
  25. * @file
  26. */
  27. /**
  28. * Class to extract and validate Exif data from jpeg (and possibly tiff) files.
  29. * @ingroup Media
  30. */
  31. class Exif {
  32. /** An 8-bit (1-byte) unsigned integer. */
  33. const BYTE = 1;
  34. /** An 8-bit byte containing one 7-bit ASCII code.
  35. * The final byte is terminated with NULL.
  36. */
  37. const ASCII = 2;
  38. /** A 16-bit (2-byte) unsigned integer. */
  39. const SHORT = 3;
  40. /** A 32-bit (4-byte) unsigned integer. */
  41. const LONG = 4;
  42. /** Two LONGs. The first LONG is the numerator and the second LONG expresses
  43. * the denominator
  44. */
  45. const RATIONAL = 5;
  46. /** A 16-bit (2-byte) or 32-bit (4-byte) unsigned integer. */
  47. const SHORT_OR_LONG = 6;
  48. /** An 8-bit byte that can take any value depending on the field definition */
  49. const UNDEFINED = 7;
  50. /** A 32-bit (4-byte) signed integer (2's complement notation), */
  51. const SLONG = 9;
  52. /** Two SLONGs. The first SLONG is the numerator and the second SLONG is
  53. * the denominator.
  54. */
  55. const SRATIONAL = 10;
  56. /** A fake value for things we don't want or don't support. */
  57. const IGNORE = -1;
  58. /** @var array Exif tags grouped by category, the tagname itself is the key
  59. * and the type is the value, in the case of more than one possible value
  60. * type they are separated by commas.
  61. */
  62. private $mExifTags;
  63. /** @var array The raw Exif data returned by exif_read_data() */
  64. private $mRawExifData;
  65. /** @var array A Filtered version of $mRawExifData that has been pruned
  66. * of invalid tags and tags that contain content they shouldn't contain
  67. * according to the Exif specification
  68. */
  69. private $mFilteredExifData;
  70. /** @var string The file being processed */
  71. private $file;
  72. /** @var string The basename of the file being processed */
  73. private $basename;
  74. /** @var string The private log to log to, e.g. 'exif' */
  75. private $log = false;
  76. /** @var string The byte order of the file. Needed because php's extension
  77. * doesn't fully process some obscure props.
  78. */
  79. private $byteOrder;
  80. /**
  81. * @param string $file Filename.
  82. * @param string $byteOrder Type of byte ordering either 'BE' (Big Endian)
  83. * or 'LE' (Little Endian). Default ''.
  84. * @throws MWException
  85. * @todo FIXME: The following are broke:
  86. * SubjectArea. Need to test the more obscure tags.
  87. * DigitalZoomRatio = 0/0 is rejected. need to determine if that's valid.
  88. * Possibly should treat 0/0 = 0. need to read exif spec on that.
  89. */
  90. function __construct( $file, $byteOrder = '' ) {
  91. /**
  92. * Page numbers here refer to pages in the Exif 2.2 standard
  93. *
  94. * Note, Exif::UNDEFINED is treated as a string, not as an array of bytes
  95. * so don't put a count parameter for any UNDEFINED values.
  96. *
  97. * @link http://exif.org/Exif2-2.PDF The Exif 2.2 specification
  98. */
  99. $this->mExifTags = [
  100. # TIFF Rev. 6.0 Attribute Information (p22)
  101. 'IFD0' => [
  102. # Tags relating to image structure
  103. 'ImageWidth' => self::SHORT_OR_LONG, # Image width
  104. 'ImageLength' => self::SHORT_OR_LONG, # Image height
  105. 'BitsPerSample' => [ self::SHORT, 3 ], # Number of bits per component
  106. # "When a primary image is JPEG compressed, this designation is not"
  107. # "necessary and is omitted." (p23)
  108. 'Compression' => self::SHORT, # Compression scheme #p23
  109. 'PhotometricInterpretation' => self::SHORT, # Pixel composition #p23
  110. 'Orientation' => self::SHORT, # Orientation of image #p24
  111. 'SamplesPerPixel' => self::SHORT, # Number of components
  112. 'PlanarConfiguration' => self::SHORT, # Image data arrangement #p24
  113. 'YCbCrSubSampling' => [ self::SHORT, 2 ], # Subsampling ratio of Y to C #p24
  114. 'YCbCrPositioning' => self::SHORT, # Y and C positioning #p24-25
  115. 'XResolution' => self::RATIONAL, # Image resolution in width direction
  116. 'YResolution' => self::RATIONAL, # Image resolution in height direction
  117. 'ResolutionUnit' => self::SHORT, # Unit of X and Y resolution #(p26)
  118. # Tags relating to recording offset
  119. 'StripOffsets' => self::SHORT_OR_LONG, # Image data location
  120. 'RowsPerStrip' => self::SHORT_OR_LONG, # Number of rows per strip
  121. 'StripByteCounts' => self::SHORT_OR_LONG, # Bytes per compressed strip
  122. 'JPEGInterchangeFormat' => self::SHORT_OR_LONG, # Offset to JPEG SOI
  123. 'JPEGInterchangeFormatLength' => self::SHORT_OR_LONG, # Bytes of JPEG data
  124. # Tags relating to image data characteristics
  125. 'TransferFunction' => self::IGNORE, # Transfer function
  126. 'WhitePoint' => [ self::RATIONAL, 2 ], # White point chromaticity
  127. 'PrimaryChromaticities' => [ self::RATIONAL, 6 ], # Chromaticities of primarities
  128. # Color space transformation matrix coefficients #p27
  129. 'YCbCrCoefficients' => [ self::RATIONAL, 3 ],
  130. 'ReferenceBlackWhite' => [ self::RATIONAL, 6 ], # Pair of black and white reference values
  131. # Other tags
  132. 'DateTime' => self::ASCII, # File change date and time
  133. 'ImageDescription' => self::ASCII, # Image title
  134. 'Make' => self::ASCII, # Image input equipment manufacturer
  135. 'Model' => self::ASCII, # Image input equipment model
  136. 'Software' => self::ASCII, # Software used
  137. 'Artist' => self::ASCII, # Person who created the image
  138. 'Copyright' => self::ASCII, # Copyright holder
  139. ],
  140. # Exif IFD Attribute Information (p30-31)
  141. 'EXIF' => [
  142. # @todo NOTE: Nonexistence of this field is taken to mean nonconformance
  143. # to the Exif 2.1 AND 2.2 standards
  144. 'ExifVersion' => self::UNDEFINED, # Exif version
  145. 'FlashPixVersion' => self::UNDEFINED, # Supported Flashpix version #p32
  146. # Tags relating to Image Data Characteristics
  147. 'ColorSpace' => self::SHORT, # Color space information #p32
  148. # Tags relating to image configuration
  149. 'ComponentsConfiguration' => self::UNDEFINED, # Meaning of each component #p33
  150. 'CompressedBitsPerPixel' => self::RATIONAL, # Image compression mode
  151. 'PixelYDimension' => self::SHORT_OR_LONG, # Valid image height
  152. 'PixelXDimension' => self::SHORT_OR_LONG, # Valid image width
  153. # Tags relating to related user information
  154. 'MakerNote' => self::IGNORE, # Manufacturer notes
  155. 'UserComment' => self::UNDEFINED, # User comments #p34
  156. # Tags relating to related file information
  157. 'RelatedSoundFile' => self::ASCII, # Related audio file
  158. # Tags relating to date and time
  159. 'DateTimeOriginal' => self::ASCII, # Date and time of original data generation #p36
  160. 'DateTimeDigitized' => self::ASCII, # Date and time of original data generation
  161. 'SubSecTime' => self::ASCII, # DateTime subseconds
  162. 'SubSecTimeOriginal' => self::ASCII, # DateTimeOriginal subseconds
  163. 'SubSecTimeDigitized' => self::ASCII, # DateTimeDigitized subseconds
  164. # Tags relating to picture-taking conditions (p31)
  165. 'ExposureTime' => self::RATIONAL, # Exposure time
  166. 'FNumber' => self::RATIONAL, # F Number
  167. 'ExposureProgram' => self::SHORT, # Exposure Program #p38
  168. 'SpectralSensitivity' => self::ASCII, # Spectral sensitivity
  169. 'ISOSpeedRatings' => self::SHORT, # ISO speed rating
  170. 'OECF' => self::IGNORE,
  171. # Optoelectronic conversion factor. Note: We don't have support for this atm.
  172. 'ShutterSpeedValue' => self::SRATIONAL, # Shutter speed
  173. 'ApertureValue' => self::RATIONAL, # Aperture
  174. 'BrightnessValue' => self::SRATIONAL, # Brightness
  175. 'ExposureBiasValue' => self::SRATIONAL, # Exposure bias
  176. 'MaxApertureValue' => self::RATIONAL, # Maximum land aperture
  177. 'SubjectDistance' => self::RATIONAL, # Subject distance
  178. 'MeteringMode' => self::SHORT, # Metering mode #p40
  179. 'LightSource' => self::SHORT, # Light source #p40-41
  180. 'Flash' => self::SHORT, # Flash #p41-42
  181. 'FocalLength' => self::RATIONAL, # Lens focal length
  182. 'SubjectArea' => [ self::SHORT, 4 ], # Subject area
  183. 'FlashEnergy' => self::RATIONAL, # Flash energy
  184. 'SpatialFrequencyResponse' => self::IGNORE, # Spatial frequency response. Not supported atm.
  185. 'FocalPlaneXResolution' => self::RATIONAL, # Focal plane X resolution
  186. 'FocalPlaneYResolution' => self::RATIONAL, # Focal plane Y resolution
  187. 'FocalPlaneResolutionUnit' => self::SHORT, # Focal plane resolution unit #p46
  188. 'SubjectLocation' => [ self::SHORT, 2 ], # Subject location
  189. 'ExposureIndex' => self::RATIONAL, # Exposure index
  190. 'SensingMethod' => self::SHORT, # Sensing method #p46
  191. 'FileSource' => self::UNDEFINED, # File source #p47
  192. 'SceneType' => self::UNDEFINED, # Scene type #p47
  193. 'CFAPattern' => self::IGNORE, # CFA pattern. not supported atm.
  194. 'CustomRendered' => self::SHORT, # Custom image processing #p48
  195. 'ExposureMode' => self::SHORT, # Exposure mode #p48
  196. 'WhiteBalance' => self::SHORT, # White Balance #p49
  197. 'DigitalZoomRatio' => self::RATIONAL, # Digital zoom ration
  198. 'FocalLengthIn35mmFilm' => self::SHORT, # Focal length in 35 mm film
  199. 'SceneCaptureType' => self::SHORT, # Scene capture type #p49
  200. 'GainControl' => self::SHORT, # Scene control #p49-50
  201. 'Contrast' => self::SHORT, # Contrast #p50
  202. 'Saturation' => self::SHORT, # Saturation #p50
  203. 'Sharpness' => self::SHORT, # Sharpness #p50
  204. 'DeviceSettingDescription' => self::IGNORE,
  205. # Device settings description. This could maybe be supported. Need to find an
  206. # example file that uses this to see if it has stuff of interest in it.
  207. 'SubjectDistanceRange' => self::SHORT, # Subject distance range #p51
  208. 'ImageUniqueID' => self::ASCII, # Unique image ID
  209. ],
  210. # GPS Attribute Information (p52)
  211. 'GPS' => [
  212. 'GPSVersion' => self::UNDEFINED,
  213. # Should be an array of 4 Exif::BYTE's. However php treats it as an undefined
  214. # Note exif standard calls this GPSVersionID, but php doesn't like the id suffix
  215. 'GPSLatitudeRef' => self::ASCII, # North or South Latitude #p52-53
  216. 'GPSLatitude' => [ self::RATIONAL, 3 ], # Latitude
  217. 'GPSLongitudeRef' => self::ASCII, # East or West Longitude #p53
  218. 'GPSLongitude' => [ self::RATIONAL, 3 ], # Longitude
  219. 'GPSAltitudeRef' => self::UNDEFINED,
  220. # Altitude reference. Note, the exif standard says this should be an EXIF::Byte,
  221. # but php seems to disagree.
  222. 'GPSAltitude' => self::RATIONAL, # Altitude
  223. 'GPSTimeStamp' => [ self::RATIONAL, 3 ], # GPS time (atomic clock)
  224. 'GPSSatellites' => self::ASCII, # Satellites used for measurement
  225. 'GPSStatus' => self::ASCII, # Receiver status #p54
  226. 'GPSMeasureMode' => self::ASCII, # Measurement mode #p54-55
  227. 'GPSDOP' => self::RATIONAL, # Measurement precision
  228. 'GPSSpeedRef' => self::ASCII, # Speed unit #p55
  229. 'GPSSpeed' => self::RATIONAL, # Speed of GPS receiver
  230. 'GPSTrackRef' => self::ASCII, # Reference for direction of movement #p55
  231. 'GPSTrack' => self::RATIONAL, # Direction of movement
  232. 'GPSImgDirectionRef' => self::ASCII, # Reference for direction of image #p56
  233. 'GPSImgDirection' => self::RATIONAL, # Direction of image
  234. 'GPSMapDatum' => self::ASCII, # Geodetic survey data used
  235. 'GPSDestLatitudeRef' => self::ASCII, # Reference for latitude of destination #p56
  236. 'GPSDestLatitude' => [ self::RATIONAL, 3 ], # Latitude destination
  237. 'GPSDestLongitudeRef' => self::ASCII, # Reference for longitude of destination #p57
  238. 'GPSDestLongitude' => [ self::RATIONAL, 3 ], # Longitude of destination
  239. 'GPSDestBearingRef' => self::ASCII, # Reference for bearing of destination #p57
  240. 'GPSDestBearing' => self::RATIONAL, # Bearing of destination
  241. 'GPSDestDistanceRef' => self::ASCII, # Reference for distance to destination #p57-58
  242. 'GPSDestDistance' => self::RATIONAL, # Distance to destination
  243. 'GPSProcessingMethod' => self::UNDEFINED, # Name of GPS processing method
  244. 'GPSAreaInformation' => self::UNDEFINED, # Name of GPS area
  245. 'GPSDateStamp' => self::ASCII, # GPS date
  246. 'GPSDifferential' => self::SHORT, # GPS differential correction
  247. ],
  248. ];
  249. $this->file = $file;
  250. $this->basename = wfBaseName( $this->file );
  251. if ( $byteOrder === 'BE' || $byteOrder === 'LE' ) {
  252. $this->byteOrder = $byteOrder;
  253. } else {
  254. // Only give a warning for b/c, since originally we didn't
  255. // require this. The number of things affected by this is
  256. // rather small.
  257. wfWarn( 'Exif class did not have byte order specified. ' .
  258. 'Some properties may be decoded incorrectly.' );
  259. $this->byteOrder = 'BE'; // BE seems about twice as popular as LE in jpg's.
  260. }
  261. $this->debugFile( __FUNCTION__, true );
  262. if ( function_exists( 'exif_read_data' ) ) {
  263. Wikimedia\suppressWarnings();
  264. $data = exif_read_data( $this->file, 0, true );
  265. Wikimedia\restoreWarnings();
  266. } else {
  267. throw new MWException( "Internal error: exif_read_data not present. " .
  268. "\$wgShowEXIF may be incorrectly set or not checked by an extension." );
  269. }
  270. /**
  271. * exif_read_data() will return false on invalid input, such as
  272. * when somebody uploads a file called something.jpeg
  273. * containing random gibberish.
  274. */
  275. $this->mRawExifData = $data ?: [];
  276. $this->makeFilteredData();
  277. $this->collapseData();
  278. $this->debugFile( __FUNCTION__, false );
  279. }
  280. /**
  281. * Make $this->mFilteredExifData
  282. */
  283. function makeFilteredData() {
  284. $this->mFilteredExifData = [];
  285. foreach ( array_keys( $this->mRawExifData ) as $section ) {
  286. if ( !array_key_exists( $section, $this->mExifTags ) ) {
  287. $this->debug( $section, __FUNCTION__, "'$section' is not a valid Exif section" );
  288. continue;
  289. }
  290. foreach ( array_keys( $this->mRawExifData[$section] ) as $tag ) {
  291. if ( !array_key_exists( $tag, $this->mExifTags[$section] ) ) {
  292. $this->debug( $tag, __FUNCTION__, "'$tag' is not a valid tag in '$section'" );
  293. continue;
  294. }
  295. $this->mFilteredExifData[$tag] = $this->mRawExifData[$section][$tag];
  296. // This is ok, as the tags in the different sections do not conflict.
  297. // except in computed and thumbnail section, which we don't use.
  298. $value = $this->mRawExifData[$section][$tag];
  299. if ( !$this->validate( $section, $tag, $value ) ) {
  300. $this->debug( $value, __FUNCTION__, "'$tag' contained invalid data" );
  301. unset( $this->mFilteredExifData[$tag] );
  302. }
  303. }
  304. }
  305. }
  306. /**
  307. * Collapse some fields together.
  308. * This converts some fields from exif form, to a more friendly form.
  309. * For example GPS latitude to a single number.
  310. *
  311. * The rationale behind this is that we're storing data, not presenting to the user
  312. * For example a longitude is a single number describing how far away you are from
  313. * the prime meridian. Well it might be nice to split it up into minutes and seconds
  314. * for the user, it doesn't really make sense to split a single number into 4 parts
  315. * for storage. (degrees, minutes, second, direction vs single floating point number).
  316. *
  317. * Other things this might do (not really sure if they make sense or not):
  318. * Dates -> mediawiki date format.
  319. * convert values that can be in different units to be in one standardized unit.
  320. *
  321. * As an alternative approach, some of this could be done in the validate phase
  322. * if we make up our own types like Exif::DATE.
  323. */
  324. function collapseData() {
  325. $this->exifGPStoNumber( 'GPSLatitude' );
  326. $this->exifGPStoNumber( 'GPSDestLatitude' );
  327. $this->exifGPStoNumber( 'GPSLongitude' );
  328. $this->exifGPStoNumber( 'GPSDestLongitude' );
  329. if ( isset( $this->mFilteredExifData['GPSAltitude'] )
  330. && isset( $this->mFilteredExifData['GPSAltitudeRef'] )
  331. ) {
  332. // We know altitude data is a <num>/<denom> from the validation
  333. // functions ran earlier. But multiplying such a string by -1
  334. // doesn't work well, so convert.
  335. list( $num, $denom ) = explode( '/', $this->mFilteredExifData['GPSAltitude'] );
  336. $this->mFilteredExifData['GPSAltitude'] = $num / $denom;
  337. if ( $this->mFilteredExifData['GPSAltitudeRef'] === "\1" ) {
  338. $this->mFilteredExifData['GPSAltitude'] *= -1;
  339. }
  340. unset( $this->mFilteredExifData['GPSAltitudeRef'] );
  341. }
  342. $this->exifPropToOrd( 'FileSource' );
  343. $this->exifPropToOrd( 'SceneType' );
  344. $this->charCodeString( 'UserComment' );
  345. $this->charCodeString( 'GPSProcessingMethod' );
  346. $this->charCodeString( 'GPSAreaInformation' );
  347. // ComponentsConfiguration should really be an array instead of a string...
  348. // This turns a string of binary numbers into an array of numbers.
  349. if ( isset( $this->mFilteredExifData['ComponentsConfiguration'] ) ) {
  350. $val = $this->mFilteredExifData['ComponentsConfiguration'];
  351. $ccVals = [];
  352. $strLen = strlen( $val );
  353. for ( $i = 0; $i < $strLen; $i++ ) {
  354. $ccVals[$i] = ord( substr( $val, $i, 1 ) );
  355. }
  356. $ccVals['_type'] = 'ol'; // this is for formatting later.
  357. $this->mFilteredExifData['ComponentsConfiguration'] = $ccVals;
  358. }
  359. // GPSVersion(ID) is treated as the wrong type by php exif support.
  360. // Go through each byte turning it into a version string.
  361. // For example: "\x02\x02\x00\x00" -> "2.2.0.0"
  362. // Also change exif tag name from GPSVersion (what php exif thinks it is)
  363. // to GPSVersionID (what the exif standard thinks it is).
  364. if ( isset( $this->mFilteredExifData['GPSVersion'] ) ) {
  365. $val = $this->mFilteredExifData['GPSVersion'];
  366. $newVal = '';
  367. $strLen = strlen( $val );
  368. for ( $i = 0; $i < $strLen; $i++ ) {
  369. if ( $i !== 0 ) {
  370. $newVal .= '.';
  371. }
  372. $newVal .= ord( substr( $val, $i, 1 ) );
  373. }
  374. if ( $this->byteOrder === 'LE' ) {
  375. // Need to reverse the string
  376. $newVal2 = '';
  377. for ( $i = strlen( $newVal ) - 1; $i >= 0; $i-- ) {
  378. $newVal2 .= substr( $newVal, $i, 1 );
  379. }
  380. $this->mFilteredExifData['GPSVersionID'] = $newVal2;
  381. } else {
  382. $this->mFilteredExifData['GPSVersionID'] = $newVal;
  383. }
  384. unset( $this->mFilteredExifData['GPSVersion'] );
  385. }
  386. }
  387. /**
  388. * Do userComment tags and similar. See pg. 34 of exif standard.
  389. * basically first 8 bytes is charset, rest is value.
  390. * This has not been tested on any shift-JIS strings.
  391. * @param string $prop Prop name
  392. */
  393. private function charCodeString( $prop ) {
  394. if ( isset( $this->mFilteredExifData[$prop] ) ) {
  395. if ( strlen( $this->mFilteredExifData[$prop] ) <= 8 ) {
  396. // invalid. Must be at least 9 bytes long.
  397. $this->debug( $this->mFilteredExifData[$prop], __FUNCTION__, false );
  398. unset( $this->mFilteredExifData[$prop] );
  399. return;
  400. }
  401. $charCode = substr( $this->mFilteredExifData[$prop], 0, 8 );
  402. $val = substr( $this->mFilteredExifData[$prop], 8 );
  403. switch ( $charCode ) {
  404. case "JIS\x00\x00\x00\x00\x00":
  405. $charset = "Shift-JIS";
  406. break;
  407. case "UNICODE\x00":
  408. $charset = "UTF-16" . $this->byteOrder;
  409. break;
  410. default: // ascii or undefined.
  411. $charset = "";
  412. break;
  413. }
  414. if ( $charset ) {
  415. Wikimedia\suppressWarnings();
  416. $val = iconv( $charset, 'UTF-8//IGNORE', $val );
  417. Wikimedia\restoreWarnings();
  418. } else {
  419. // if valid utf-8, assume that, otherwise assume windows-1252
  420. $valCopy = $val;
  421. UtfNormal\Validator::quickIsNFCVerify( $valCopy ); // validates $valCopy.
  422. if ( $valCopy !== $val ) {
  423. Wikimedia\suppressWarnings();
  424. $val = iconv( 'Windows-1252', 'UTF-8//IGNORE', $val );
  425. Wikimedia\restoreWarnings();
  426. }
  427. }
  428. // trim and check to make sure not only whitespace.
  429. $val = trim( $val );
  430. if ( strlen( $val ) === 0 ) {
  431. // only whitespace.
  432. $this->debug( $this->mFilteredExifData[$prop], __FUNCTION__, "$prop: Is only whitespace" );
  433. unset( $this->mFilteredExifData[$prop] );
  434. return;
  435. }
  436. // all's good.
  437. $this->mFilteredExifData[$prop] = $val;
  438. }
  439. }
  440. /**
  441. * Convert an Exif::UNDEFINED from a raw binary string
  442. * to its value. This is sometimes needed depending on
  443. * the type of UNDEFINED field
  444. * @param string $prop Name of property
  445. */
  446. private function exifPropToOrd( $prop ) {
  447. if ( isset( $this->mFilteredExifData[$prop] ) ) {
  448. $this->mFilteredExifData[$prop] = ord( $this->mFilteredExifData[$prop] );
  449. }
  450. }
  451. /**
  452. * Convert gps in exif form to a single floating point number
  453. * for example 10 degress 20`40`` S -> -10.34444
  454. * @param string $prop A GPS coordinate exif tag name (like GPSLongitude)
  455. */
  456. private function exifGPStoNumber( $prop ) {
  457. $loc =& $this->mFilteredExifData[$prop];
  458. $dir =& $this->mFilteredExifData[$prop . 'Ref'];
  459. $res = false;
  460. if ( isset( $loc ) && isset( $dir )
  461. && ( $dir === 'N' || $dir === 'S' || $dir === 'E' || $dir === 'W' )
  462. ) {
  463. list( $num, $denom ) = explode( '/', $loc[0] );
  464. $res = $num / $denom;
  465. list( $num, $denom ) = explode( '/', $loc[1] );
  466. $res += ( $num / $denom ) * ( 1 / 60 );
  467. list( $num, $denom ) = explode( '/', $loc[2] );
  468. $res += ( $num / $denom ) * ( 1 / 3600 );
  469. if ( $dir === 'S' || $dir === 'W' ) {
  470. $res *= -1; // make negative
  471. }
  472. }
  473. // update the exif records.
  474. if ( $res !== false ) { // using !== as $res could potentially be 0
  475. $this->mFilteredExifData[$prop] = $res;
  476. unset( $this->mFilteredExifData[$prop . 'Ref'] );
  477. } else { // if invalid
  478. unset( $this->mFilteredExifData[$prop] );
  479. unset( $this->mFilteredExifData[$prop . 'Ref'] );
  480. }
  481. }
  482. /** #@- */
  483. /** #@+
  484. * @return array
  485. */
  486. /**
  487. * Get $this->mRawExifData
  488. * @return array
  489. */
  490. function getData() {
  491. return $this->mRawExifData;
  492. }
  493. /**
  494. * Get $this->mFilteredExifData
  495. * @return array
  496. */
  497. function getFilteredData() {
  498. return $this->mFilteredExifData;
  499. }
  500. /** #@- */
  501. /**
  502. * The version of the output format
  503. *
  504. * Before the actual metadata information is saved in the database we
  505. * strip some of it since we don't want to save things like thumbnails
  506. * which usually accompany Exif data. This value gets saved in the
  507. * database along with the actual Exif data, and if the version in the
  508. * database doesn't equal the value returned by this function the Exif
  509. * data is regenerated.
  510. *
  511. * @return int
  512. */
  513. public static function version() {
  514. return 2; // We don't need no bloddy constants!
  515. }
  516. /**
  517. * Validates if a tag value is of the type it should be according to the Exif spec
  518. *
  519. * @param mixed $in The input value to check
  520. * @return bool
  521. */
  522. private function isByte( $in ) {
  523. if ( !is_array( $in ) && sprintf( '%d', $in ) == $in && $in >= 0 && $in <= 255 ) {
  524. $this->debug( $in, __FUNCTION__, true );
  525. return true;
  526. } else {
  527. $this->debug( $in, __FUNCTION__, false );
  528. return false;
  529. }
  530. }
  531. /**
  532. * @param mixed $in The input value to check
  533. * @return bool
  534. */
  535. private function isASCII( $in ) {
  536. if ( is_array( $in ) ) {
  537. return false;
  538. }
  539. if ( preg_match( "/[^\x0a\x20-\x7e]/", $in ) ) {
  540. $this->debug( $in, __FUNCTION__, 'found a character not in our whitelist' );
  541. return false;
  542. }
  543. if ( preg_match( '/^\s*$/', $in ) ) {
  544. $this->debug( $in, __FUNCTION__, 'input consisted solely of whitespace' );
  545. return false;
  546. }
  547. return true;
  548. }
  549. /**
  550. * @param mixed $in The input value to check
  551. * @return bool
  552. */
  553. private function isShort( $in ) {
  554. if ( !is_array( $in ) && sprintf( '%d', $in ) == $in && $in >= 0 && $in <= 65536 ) {
  555. $this->debug( $in, __FUNCTION__, true );
  556. return true;
  557. } else {
  558. $this->debug( $in, __FUNCTION__, false );
  559. return false;
  560. }
  561. }
  562. /**
  563. * @param mixed $in The input value to check
  564. * @return bool
  565. */
  566. private function isLong( $in ) {
  567. if ( !is_array( $in ) && sprintf( '%d', $in ) == $in && $in >= 0 && $in <= 4294967296 ) {
  568. $this->debug( $in, __FUNCTION__, true );
  569. return true;
  570. } else {
  571. $this->debug( $in, __FUNCTION__, false );
  572. return false;
  573. }
  574. }
  575. /**
  576. * @param mixed $in The input value to check
  577. * @return bool
  578. */
  579. private function isRational( $in ) {
  580. $m = [];
  581. # Avoid division by zero
  582. if ( !is_array( $in )
  583. && preg_match( '/^(\d+)\/(\d+[1-9]|[1-9]\d*)$/', $in, $m )
  584. ) {
  585. return $this->isLong( $m[1] ) && $this->isLong( $m[2] );
  586. } else {
  587. $this->debug( $in, __FUNCTION__, 'fed a non-fraction value' );
  588. return false;
  589. }
  590. }
  591. /**
  592. * @param mixed $in The input value to check
  593. * @return bool
  594. */
  595. private function isUndefined( $in ) {
  596. $this->debug( $in, __FUNCTION__, true );
  597. return true;
  598. }
  599. /**
  600. * @param mixed $in The input value to check
  601. * @return bool
  602. */
  603. private function isSlong( $in ) {
  604. if ( $this->isLong( abs( $in ) ) ) {
  605. $this->debug( $in, __FUNCTION__, true );
  606. return true;
  607. } else {
  608. $this->debug( $in, __FUNCTION__, false );
  609. return false;
  610. }
  611. }
  612. /**
  613. * @param mixed $in The input value to check
  614. * @return bool
  615. */
  616. private function isSrational( $in ) {
  617. $m = [];
  618. # Avoid division by zero
  619. if ( !is_array( $in ) &&
  620. preg_match( '/^(-?\d+)\/(\d+[1-9]|[1-9]\d*)$/', $in, $m )
  621. ) {
  622. return $this->isSlong( $m[0] ) && $this->isSlong( $m[1] );
  623. } else {
  624. $this->debug( $in, __FUNCTION__, 'fed a non-fraction value' );
  625. return false;
  626. }
  627. }
  628. /** #@- */
  629. /**
  630. * Validates if a tag has a legal value according to the Exif spec
  631. *
  632. * @param string $section Section where tag is located.
  633. * @param string $tag The tag to check.
  634. * @param mixed $val The value of the tag.
  635. * @param bool $recursive True if called recursively for array types.
  636. * @return bool
  637. */
  638. private function validate( $section, $tag, $val, $recursive = false ) {
  639. $debug = "tag is '$tag'";
  640. $etype = $this->mExifTags[$section][$tag];
  641. $ecount = 1;
  642. if ( is_array( $etype ) ) {
  643. list( $etype, $ecount ) = $etype;
  644. if ( $recursive ) {
  645. $ecount = 1; // checking individual elements
  646. }
  647. }
  648. $count = 1;
  649. if ( is_array( $val ) ) {
  650. $count = count( $val );
  651. if ( $ecount != $count ) {
  652. $this->debug( $val, __FUNCTION__, "Expected $ecount elements for $tag but got $count" );
  653. return false;
  654. }
  655. }
  656. // If there are multiple values, recursively validate each of them.
  657. if ( $count > 1 ) {
  658. foreach ( $val as $v ) {
  659. if ( !$this->validate( $section, $tag, $v, true ) ) {
  660. return false;
  661. }
  662. }
  663. return true;
  664. }
  665. // Does not work if not typecast
  666. switch ( (string)$etype ) {
  667. case (string)self::BYTE:
  668. $this->debug( $val, __FUNCTION__, $debug );
  669. return $this->isByte( $val );
  670. case (string)self::ASCII:
  671. $this->debug( $val, __FUNCTION__, $debug );
  672. return $this->isASCII( $val );
  673. case (string)self::SHORT:
  674. $this->debug( $val, __FUNCTION__, $debug );
  675. return $this->isShort( $val );
  676. case (string)self::LONG:
  677. $this->debug( $val, __FUNCTION__, $debug );
  678. return $this->isLong( $val );
  679. case (string)self::RATIONAL:
  680. $this->debug( $val, __FUNCTION__, $debug );
  681. return $this->isRational( $val );
  682. case (string)self::SHORT_OR_LONG:
  683. $this->debug( $val, __FUNCTION__, $debug );
  684. return $this->isShort( $val ) || $this->isLong( $val );
  685. case (string)self::UNDEFINED:
  686. $this->debug( $val, __FUNCTION__, $debug );
  687. return $this->isUndefined( $val );
  688. case (string)self::SLONG:
  689. $this->debug( $val, __FUNCTION__, $debug );
  690. return $this->isSlong( $val );
  691. case (string)self::SRATIONAL:
  692. $this->debug( $val, __FUNCTION__, $debug );
  693. return $this->isSrational( $val );
  694. case (string)self::IGNORE:
  695. $this->debug( $val, __FUNCTION__, $debug );
  696. return false;
  697. default:
  698. $this->debug( $val, __FUNCTION__, "The tag '$tag' is unknown" );
  699. return false;
  700. }
  701. }
  702. /**
  703. * Convenience function for debugging output
  704. *
  705. * @param mixed $in Arrays will be processed with print_r().
  706. * @param string $fname Function name to log.
  707. * @param string|bool|null $action Default null.
  708. */
  709. private function debug( $in, $fname, $action = null ) {
  710. if ( !$this->log ) {
  711. return;
  712. }
  713. $type = gettype( $in );
  714. $class = ucfirst( __CLASS__ );
  715. if ( is_array( $in ) ) {
  716. $in = print_r( $in, true );
  717. }
  718. if ( $action === true ) {
  719. wfDebugLog( $this->log, "$class::$fname: accepted: '$in' (type: $type)" );
  720. } elseif ( $action === false ) {
  721. wfDebugLog( $this->log, "$class::$fname: rejected: '$in' (type: $type)" );
  722. } elseif ( $action === null ) {
  723. wfDebugLog( $this->log, "$class::$fname: input was: '$in' (type: $type)" );
  724. } else {
  725. wfDebugLog( $this->log, "$class::$fname: $action (type: $type; content: '$in')" );
  726. }
  727. }
  728. /**
  729. * Convenience function for debugging output
  730. *
  731. * @param string $fname The name of the function calling this function
  732. * @param bool $io Specify whether we're beginning or ending
  733. */
  734. private function debugFile( $fname, $io ) {
  735. if ( !$this->log ) {
  736. return;
  737. }
  738. $class = ucfirst( __CLASS__ );
  739. if ( $io ) {
  740. wfDebugLog( $this->log, "$class::$fname: begin processing: '{$this->basename}'" );
  741. } else {
  742. wfDebugLog( $this->log, "$class::$fname: end processing: '{$this->basename}'" );
  743. }
  744. }
  745. }