IEUrlExtension.php 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. <?php
  2. /**
  3. * Checks for validity of requested URL's extension.
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. */
  22. /**
  23. * Internet Explorer derives a cache filename from a URL, and then in certain
  24. * circumstances, uses the extension of the resulting file to determine the
  25. * content type of the data, ignoring the Content-Type header.
  26. *
  27. * This can be a problem, especially when non-HTML content is sent by MediaWiki,
  28. * and Internet Explorer interprets it as HTML, exposing an XSS vulnerability.
  29. *
  30. * Usually the script filename (e.g. api.php) is present in the URL, and this
  31. * makes Internet Explorer think the extension is a harmless script extension.
  32. * But Internet Explorer 6 and earlier allows the script extension to be
  33. * obscured by encoding the dot as "%2E".
  34. *
  35. * This class contains functions which help in detecting and dealing with this
  36. * situation.
  37. *
  38. * Checking the URL for a bad extension is somewhat complicated due to the fact
  39. * that CGI doesn't provide a standard method to determine the URL. Instead it
  40. * is necessary to pass a subset of $_SERVER variables, which we then attempt
  41. * to use to guess parts of the URL.
  42. */
  43. class IEUrlExtension {
  44. /**
  45. * Check a subset of $_SERVER (or the whole of $_SERVER if you like)
  46. * to see if it indicates that the request was sent with a bad file
  47. * extension. Returns true if the request should be denied or modified,
  48. * false otherwise. The relevant $_SERVER elements are:
  49. *
  50. * - SERVER_SOFTWARE
  51. * - REQUEST_URI
  52. * - QUERY_STRING
  53. * - PATH_INFO
  54. *
  55. * If the a variable is unset in $_SERVER, it should be unset in $vars.
  56. *
  57. * @param array $vars A subset of $_SERVER.
  58. * @param array $extWhitelist Extensions which are allowed, assumed harmless.
  59. * @return bool
  60. */
  61. public static function areServerVarsBad( $vars, $extWhitelist = [] ) {
  62. // Check QUERY_STRING or REQUEST_URI
  63. if ( isset( $vars['SERVER_SOFTWARE'] )
  64. && isset( $vars['REQUEST_URI'] )
  65. && self::haveUndecodedRequestUri( $vars['SERVER_SOFTWARE'] )
  66. ) {
  67. $urlPart = $vars['REQUEST_URI'];
  68. } elseif ( isset( $vars['QUERY_STRING'] ) ) {
  69. $urlPart = $vars['QUERY_STRING'];
  70. } else {
  71. $urlPart = '';
  72. }
  73. if ( self::isUrlExtensionBad( $urlPart, $extWhitelist ) ) {
  74. return true;
  75. }
  76. // Some servers have PATH_INFO but not REQUEST_URI, so we check both
  77. // to be on the safe side.
  78. if ( isset( $vars['PATH_INFO'] )
  79. && self::isUrlExtensionBad( $vars['PATH_INFO'], $extWhitelist )
  80. ) {
  81. return true;
  82. }
  83. // All checks passed
  84. return false;
  85. }
  86. /**
  87. * Given a right-hand portion of a URL, determine whether IE would detect
  88. * a potentially harmful file extension.
  89. *
  90. * @param string $urlPart The right-hand portion of a URL
  91. * @param array $extWhitelist An array of file extensions which may occur in this
  92. * URL, and which should be allowed.
  93. * @return bool
  94. */
  95. public static function isUrlExtensionBad( $urlPart, $extWhitelist = [] ) {
  96. if ( strval( $urlPart ) === '' ) {
  97. return false;
  98. }
  99. $extension = self::findIE6Extension( $urlPart );
  100. if ( strval( $extension ) === '' ) {
  101. // No extension or empty extension
  102. return false;
  103. }
  104. if ( in_array( $extension, [ 'php', 'php5' ] ) ) {
  105. // Script extension, OK
  106. return false;
  107. }
  108. if ( in_array( $extension, $extWhitelist ) ) {
  109. // Whitelisted extension
  110. return false;
  111. }
  112. if ( !preg_match( '/^[a-zA-Z0-9_-]+$/', $extension ) ) {
  113. // Non-alphanumeric extension, unlikely to be registered.
  114. // The regex above is known to match all registered file extensions
  115. // in a default Windows XP installation. It's important to allow
  116. // extensions with ampersands and percent signs, since that reduces
  117. // the number of false positives substantially.
  118. return false;
  119. }
  120. // Possibly bad extension
  121. return true;
  122. }
  123. /**
  124. * Returns a variant of $url which will pass isUrlExtensionBad() but has the
  125. * same GET parameters, or false if it can't figure one out.
  126. * @param string $url
  127. * @param array $extWhitelist
  128. * @return bool|string
  129. */
  130. public static function fixUrlForIE6( $url, $extWhitelist = [] ) {
  131. $questionPos = strpos( $url, '?' );
  132. if ( $questionPos === false ) {
  133. $beforeQuery = $url . '?';
  134. $query = '';
  135. } elseif ( $questionPos === strlen( $url ) - 1 ) {
  136. $beforeQuery = $url;
  137. $query = '';
  138. } else {
  139. $beforeQuery = substr( $url, 0, $questionPos + 1 );
  140. $query = substr( $url, $questionPos + 1 );
  141. }
  142. // Multiple question marks cause problems. Encode the second and
  143. // subsequent question mark.
  144. $query = str_replace( '?', '%3E', $query );
  145. // Append an invalid path character so that IE6 won't see the end of the
  146. // query string as an extension
  147. $query .= '&*';
  148. // Put the URL back together
  149. $url = $beforeQuery . $query;
  150. if ( self::isUrlExtensionBad( $url, $extWhitelist ) ) {
  151. // Avoid a redirect loop
  152. return false;
  153. }
  154. return $url;
  155. }
  156. /**
  157. * Determine what extension IE6 will infer from a certain query string.
  158. * If the URL has an extension before the question mark, IE6 will use
  159. * that and ignore the query string, but per the comment at
  160. * isPathInfoBad() we don't have a reliable way to determine the URL,
  161. * so isPathInfoBad() just passes in the query string for $url.
  162. * All entry points have safe extensions (php, php5) anyway, so
  163. * checking the query string is possibly overly paranoid but never
  164. * insecure.
  165. *
  166. * The criteria for finding an extension are as follows:
  167. * - a possible extension is a dot followed by one or more characters not
  168. * in <>\"/:|?.#
  169. * - if we find a possible extension followed by the end of the string or
  170. * a #, that's our extension
  171. * - if we find a possible extension followed by a ?, that's our extension
  172. * - UNLESS it's exe, dll or cgi, in which case we ignore it and continue
  173. * searching for another possible extension
  174. * - if we find a possible extension followed by a dot or another illegal
  175. * character, we ignore it and continue searching
  176. *
  177. * @param string $url
  178. * @return mixed Detected extension (string), or false if none found
  179. */
  180. public static function findIE6Extension( $url ) {
  181. $pos = 0;
  182. $hashPos = strpos( $url, '#' );
  183. if ( $hashPos !== false ) {
  184. $urlLength = $hashPos;
  185. } else {
  186. $urlLength = strlen( $url );
  187. }
  188. $remainingLength = $urlLength;
  189. while ( $remainingLength > 0 ) {
  190. // Skip ahead to the next dot
  191. $pos += strcspn( $url, '.', $pos, $remainingLength );
  192. if ( $pos >= $urlLength ) {
  193. // End of string, we're done
  194. return false;
  195. }
  196. // We found a dot. Skip past it
  197. $pos++;
  198. $remainingLength = $urlLength - $pos;
  199. // Check for illegal characters in our prospective extension,
  200. // or for another dot
  201. $nextPos = $pos + strcspn( $url, "<>\\\"/:|?*.", $pos, $remainingLength );
  202. if ( $nextPos >= $urlLength ) {
  203. // No illegal character or next dot
  204. // We have our extension
  205. return substr( $url, $pos, $urlLength - $pos );
  206. }
  207. if ( $url[$nextPos] === '?' ) {
  208. // We've found a legal extension followed by a question mark
  209. // If the extension is NOT exe, dll or cgi, return it
  210. $extension = substr( $url, $pos, $nextPos - $pos );
  211. if ( strcasecmp( $extension, 'exe' ) && strcasecmp( $extension, 'dll' ) &&
  212. strcasecmp( $extension, 'cgi' )
  213. ) {
  214. return $extension;
  215. }
  216. // Else continue looking
  217. }
  218. // We found an illegal character or another dot
  219. // Skip to that character and continue the loop
  220. $pos = $nextPos;
  221. $remainingLength = $urlLength - $pos;
  222. }
  223. return false;
  224. }
  225. /**
  226. * When passed the value of $_SERVER['SERVER_SOFTWARE'], this function
  227. * returns true if that server is known to have a REQUEST_URI variable
  228. * with %2E not decoded to ".". On such a server, it is possible to detect
  229. * whether the script filename has been obscured.
  230. *
  231. * The function returns false if the server is not known to have this
  232. * behavior. Microsoft IIS in particular is known to decode escaped script
  233. * filenames.
  234. *
  235. * SERVER_SOFTWARE typically contains either a plain string such as "Zeus",
  236. * or a specification in the style of a User-Agent header, such as
  237. * "Apache/1.3.34 (Unix) mod_ssl/2.8.25 OpenSSL/0.9.8a PHP/4.4.2"
  238. *
  239. * @param string $serverSoftware
  240. * @return bool
  241. */
  242. public static function haveUndecodedRequestUri( $serverSoftware ) {
  243. static $whitelist = [
  244. 'Apache',
  245. 'Zeus',
  246. 'LiteSpeed' ];
  247. if ( preg_match( '/^(.*?)($|\/| )/', $serverSoftware, $m ) ) {
  248. return in_array( $m[1], $whitelist );
  249. } else {
  250. return false;
  251. }
  252. }
  253. }