OutputHandler.php 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. <?php
  2. /**
  3. * Standard output handler for use with ob_start
  4. */
  5. function wfOutputHandler( $s ) {
  6. global $wgDisableOutputCompression, $wgValidateAllHtml;
  7. $s = wfMangleFlashPolicy( $s );
  8. if ( $wgValidateAllHtml ) {
  9. $headers = apache_response_headers();
  10. $isHTML = true;
  11. foreach ( $headers as $name => $value ) {
  12. if ( strtolower( $name ) == 'content-type' && strpos( $value, 'text/html' ) === false && strpos( $value, 'application/xhtml+xml' ) === false ) {
  13. $isHTML = false;
  14. break;
  15. }
  16. }
  17. if ( $isHTML ) {
  18. $s = wfHtmlValidationHandler( $s );
  19. }
  20. }
  21. if ( !$wgDisableOutputCompression && !ini_get( 'zlib.output_compression' ) ) {
  22. if ( !defined( 'MW_NO_OUTPUT_COMPRESSION' ) ) {
  23. $s = wfGzipHandler( $s );
  24. }
  25. if ( !ini_get( 'output_handler' ) ) {
  26. wfDoContentLength( strlen( $s ) );
  27. }
  28. }
  29. return $s;
  30. }
  31. /**
  32. * Get the "file extension" that some client apps will estimate from
  33. * the currently-requested URL.
  34. * This isn't on WebRequest because we need it when things aren't initialized
  35. * @private
  36. */
  37. function wfRequestExtension() {
  38. /// @fixme -- this sort of dupes some code in WebRequest::getRequestUrl()
  39. if( isset( $_SERVER['REQUEST_URI'] ) ) {
  40. // Strip the query string...
  41. list( $path ) = explode( '?', $_SERVER['REQUEST_URI'], 2 );
  42. } elseif( isset( $_SERVER['SCRIPT_NAME'] ) ) {
  43. // Probably IIS. QUERY_STRING appears separately.
  44. $path = $_SERVER['SCRIPT_NAME'];
  45. } else {
  46. // Can't get the path from the server? :(
  47. return '';
  48. }
  49. $period = strrpos( $path, '.' );
  50. if( $period !== false ) {
  51. return strtolower( substr( $path, $period ) );
  52. }
  53. return '';
  54. }
  55. /**
  56. * Handler that compresses data with gzip if allowed by the Accept header.
  57. * Unlike ob_gzhandler, it works for HEAD requests too.
  58. */
  59. function wfGzipHandler( $s ) {
  60. if( !function_exists( 'gzencode' ) || headers_sent() ) {
  61. return $s;
  62. }
  63. $ext = wfRequestExtension();
  64. if( $ext == '.gz' || $ext == '.tgz' ) {
  65. // Don't do gzip compression if the URL path ends in .gz or .tgz
  66. // This confuses Safari and triggers a download of the page,
  67. // even though it's pretty clearly labeled as viewable HTML.
  68. // Bad Safari! Bad!
  69. return $s;
  70. }
  71. if( isset( $_SERVER['HTTP_ACCEPT_ENCODING'] ) ) {
  72. $tokens = preg_split( '/[,; ]/', $_SERVER['HTTP_ACCEPT_ENCODING'] );
  73. if ( in_array( 'gzip', $tokens ) ) {
  74. header( 'Content-Encoding: gzip' );
  75. $s = gzencode( $s, 3 );
  76. }
  77. }
  78. // Set vary header if it hasn't been set already
  79. $headers = headers_list();
  80. $foundVary = false;
  81. foreach ( $headers as $header ) {
  82. if ( substr( $header, 0, 5 ) == 'Vary:' ) {
  83. $foundVary = true;
  84. break;
  85. }
  86. }
  87. if ( !$foundVary ) {
  88. header( 'Vary: Accept-Encoding' );
  89. header( 'X-Vary-Options: Accept-Encoding;list-contains=gzip' );
  90. }
  91. return $s;
  92. }
  93. /**
  94. * Mangle flash policy tags which open up the site to XSS attacks.
  95. */
  96. function wfMangleFlashPolicy( $s ) {
  97. # Avoid weird excessive memory usage in PCRE on big articles
  98. if ( preg_match( '/\<\s*cross-domain-policy\s*\>/i', $s ) ) {
  99. return preg_replace( '/\<\s*cross-domain-policy\s*\>/i', '<NOT-cross-domain-policy>', $s );
  100. } else {
  101. return $s;
  102. }
  103. }
  104. /**
  105. * Add a Content-Length header if possible. This makes it cooperate with squid better.
  106. */
  107. function wfDoContentLength( $length ) {
  108. if ( !headers_sent() && isset( $_SERVER['SERVER_PROTOCOL'] ) && $_SERVER['SERVER_PROTOCOL'] == 'HTTP/1.0' ) {
  109. header( "Content-Length: $length" );
  110. }
  111. }
  112. /**
  113. * Replace the output with an error if the HTML is not valid
  114. */
  115. function wfHtmlValidationHandler( $s ) {
  116. $errors = '';
  117. if ( MWTidy::checkErrors( $s, $errors ) ) {
  118. return $s;
  119. }
  120. header( 'Cache-Control: no-cache' );
  121. $out = <<<EOT
  122. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  123. <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr">
  124. <head>
  125. <title>HTML validation error</title>
  126. <style>
  127. .highlight { background-color: #ffc }
  128. li { white-space: pre }
  129. </style>
  130. </head>
  131. <body>
  132. <h1>HTML validation error</h1>
  133. <ul>
  134. EOT;
  135. $error = strtok( $errors, "\n" );
  136. $badLines = array();
  137. while ( $error !== false ) {
  138. if ( preg_match( '/^line (\d+)/', $error, $m ) ) {
  139. $lineNum = intval( $m[1] );
  140. $badLines[$lineNum] = true;
  141. $out .= "<li><a href=\"#line-{$lineNum}\">" . htmlspecialchars( $error ) . "</a></li>\n";
  142. }
  143. $error = strtok( "\n" );
  144. }
  145. $out .= '</ul>';
  146. $out .= '<pre>' . htmlspecialchars( $errors ) . '</pre>';
  147. $out .= "<ol>\n";
  148. $line = strtok( $s, "\n" );
  149. $i = 1;
  150. while ( $line !== false ) {
  151. if ( isset( $badLines[$i] ) ) {
  152. $out .= "<li class=\"highlight\" id=\"line-$i\">";
  153. } else {
  154. $out .= '<li>';
  155. }
  156. $out .= htmlspecialchars( $line ) . "</li>\n";
  157. $line = strtok( "\n" );
  158. $i++;
  159. }
  160. $out .= '</ol></body></html>';
  161. return $out;
  162. }