findHooks.php 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. <?php
  2. /**
  3. * Simple script that try to find documented hook and hooks actually
  4. * in the code and show what's missing.
  5. *
  6. * This script assumes that:
  7. * - hooks names in hooks.txt are at the beginning of a line and single quoted.
  8. * - hooks names in code are the first parameter of wfRunHooks.
  9. *
  10. * if --online option is passed, the script will compare the hooks in the code
  11. * with the ones at http://www.mediawiki.org/wiki/Manual:Hooks
  12. *
  13. * Any instance of wfRunHooks that doesn't meet these parameters will be noted.
  14. *
  15. * Copyright © Antoine Musso
  16. *
  17. * This program is free software; you can redistribute it and/or modify
  18. * it under the terms of the GNU General Public License as published by
  19. * the Free Software Foundation; either version 2 of the License, or
  20. * (at your option) any later version.
  21. *
  22. * This program is distributed in the hope that it will be useful,
  23. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  24. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  25. * GNU General Public License for more details.
  26. *
  27. * You should have received a copy of the GNU General Public License along
  28. * with this program; if not, write to the Free Software Foundation, Inc.,
  29. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  30. * http://www.gnu.org/copyleft/gpl.html
  31. *
  32. * @file
  33. * @ingroup Maintenance
  34. * @author Antoine Musso <hashar at free dot fr>
  35. */
  36. require_once __DIR__ . '/Maintenance.php';
  37. /**
  38. * Maintenance script that compares documented and actually present mismatches.
  39. *
  40. * @ingroup Maintenance
  41. */
  42. class FindHooks extends Maintenance {
  43. const FIND_NON_RECURSIVE = 0;
  44. const FIND_RECURSIVE = 1;
  45. /*
  46. * Hooks that are ignored
  47. */
  48. protected static $ignore = [ 'testRunLegacyHooks', 'Test' ];
  49. public function __construct() {
  50. parent::__construct();
  51. $this->addDescription( 'Find hooks that are undocumented, missing, or just plain wrong' );
  52. $this->addOption( 'online', 'Check against MediaWiki.org hook documentation' );
  53. }
  54. public function getDbType() {
  55. return Maintenance::DB_NONE;
  56. }
  57. public function execute() {
  58. global $IP;
  59. $documentedHooks = $this->getHooksFromDoc( $IP . '/docs/hooks.txt' );
  60. $potentialHooks = [];
  61. $badHooks = [];
  62. $recurseDirs = [
  63. "$IP/includes/",
  64. "$IP/mw-config/",
  65. "$IP/languages/",
  66. "$IP/maintenance/",
  67. // Omit $IP/tests/phpunit as it contains hook tests that shouldn't be documented
  68. "$IP/tests/parser",
  69. "$IP/tests/phpunit/suites",
  70. ];
  71. $nonRecurseDirs = [
  72. "$IP/",
  73. ];
  74. foreach ( $recurseDirs as $dir ) {
  75. $ret = $this->getHooksFromDir( $dir, self::FIND_RECURSIVE );
  76. $potentialHooks = array_merge( $potentialHooks, $ret['good'] );
  77. $badHooks = array_merge( $badHooks, $ret['bad'] );
  78. }
  79. foreach ( $nonRecurseDirs as $dir ) {
  80. $ret = $this->getHooksFromDir( $dir );
  81. $potentialHooks = array_merge( $potentialHooks, $ret['good'] );
  82. $badHooks = array_merge( $badHooks, $ret['bad'] );
  83. }
  84. $documented = array_keys( $documentedHooks );
  85. $potential = array_keys( $potentialHooks );
  86. $potential = array_unique( $potential );
  87. $badHooks = array_diff( array_unique( $badHooks ), self::$ignore );
  88. $todo = array_diff( $potential, $documented, self::$ignore );
  89. $deprecated = array_diff( $documented, $potential, self::$ignore );
  90. // Check parameter count and references
  91. $badParameterCount = $badParameterReference = [];
  92. foreach ( $potentialHooks as $hook => $args ) {
  93. if ( !isset( $documentedHooks[$hook] ) ) {
  94. // Not documented, but that will also be in $todo
  95. continue;
  96. }
  97. $argsDoc = $documentedHooks[$hook];
  98. if ( $args === 'unknown' || $argsDoc === 'unknown' ) {
  99. // Could not get parameter information
  100. continue;
  101. }
  102. if ( count( $argsDoc ) !== count( $args ) ) {
  103. $badParameterCount[] = $hook . ': Doc: ' . count( $argsDoc ) . ' vs. Code: ' . count( $args );
  104. } else {
  105. // Check if & is equal
  106. foreach ( $argsDoc as $index => $argDoc ) {
  107. $arg = $args[$index];
  108. if ( ( $arg[0] === '&' ) !== ( $argDoc[0] === '&' ) ) {
  109. $badParameterReference[] = $hook . ': References different: Doc: ' . $argDoc .
  110. ' vs. Code: ' . $arg;
  111. }
  112. }
  113. }
  114. }
  115. // Print the results
  116. $this->printArray( 'Undocumented', $todo );
  117. $this->printArray( 'Documented and not found', $deprecated );
  118. $this->printArray( 'Unclear hook calls', $badHooks );
  119. $this->printArray( 'Different parameter count', $badParameterCount );
  120. $this->printArray( 'Different parameter reference', $badParameterReference );
  121. if ( !$todo && !$deprecated && !$badHooks
  122. && !$badParameterCount && !$badParameterReference
  123. ) {
  124. $this->output( "Looks good!\n" );
  125. } else {
  126. $this->error( 'The script finished with errors.', 1 );
  127. }
  128. }
  129. /**
  130. * Get the hook documentation, either locally or from MediaWiki.org
  131. * @param string $doc
  132. * @return array Array: key => hook name; value => array of arguments or string 'unknown'
  133. */
  134. private function getHooksFromDoc( $doc ) {
  135. if ( $this->hasOption( 'online' ) ) {
  136. return $this->getHooksFromOnlineDoc();
  137. } else {
  138. return $this->getHooksFromLocalDoc( $doc );
  139. }
  140. }
  141. /**
  142. * Get hooks from a local file (for example docs/hooks.txt)
  143. * @param string $doc Filename to look in
  144. * @return array Array: key => hook name; value => array of arguments or string 'unknown'
  145. */
  146. private function getHooksFromLocalDoc( $doc ) {
  147. $m = [];
  148. $content = file_get_contents( $doc );
  149. preg_match_all(
  150. "/\n'(.*?)':.*((?:\n.+)*)/",
  151. $content,
  152. $m,
  153. PREG_SET_ORDER
  154. );
  155. // Extract the documented parameter
  156. $hooks = [];
  157. foreach ( $m as $match ) {
  158. $args = [];
  159. if ( isset( $match[2] ) ) {
  160. $n = [];
  161. if ( preg_match_all( "/\n(&?\\$\w+):.+/", $match[2], $n ) ) {
  162. $args = $n[1];
  163. }
  164. }
  165. $hooks[$match[1]] = $args;
  166. }
  167. return $hooks;
  168. }
  169. /**
  170. * Get hooks from www.mediawiki.org using the API
  171. * @return array Array: key => hook name; value => string 'unknown'
  172. */
  173. private function getHooksFromOnlineDoc() {
  174. $allhooks = $this->getHooksFromOnlineDocCategory( 'MediaWiki_hooks' );
  175. $removed = $this->getHooksFromOnlineDocCategory( 'Removed_hooks' );
  176. return array_diff_key( $allhooks, $removed );
  177. }
  178. /**
  179. * @param string $title
  180. * @return array
  181. */
  182. private function getHooksFromOnlineDocCategory( $title ) {
  183. $params = [
  184. 'action' => 'query',
  185. 'list' => 'categorymembers',
  186. 'cmtitle' => "Category:$title",
  187. 'cmlimit' => 500,
  188. 'format' => 'json',
  189. 'continue' => '',
  190. ];
  191. $retval = [];
  192. while ( true ) {
  193. $json = Http::get(
  194. wfAppendQuery( 'http://www.mediawiki.org/w/api.php', $params ),
  195. [],
  196. __METHOD__
  197. );
  198. $data = FormatJson::decode( $json, true );
  199. foreach ( $data['query']['categorymembers'] as $page ) {
  200. if ( preg_match( '/Manual\:Hooks\/([a-zA-Z0-9- :]+)/', $page['title'], $m ) ) {
  201. // parameters are unknown, because that needs parsing of wikitext
  202. $retval[str_replace( ' ', '_', $m[1] )] = 'unknown';
  203. }
  204. }
  205. if ( !isset( $data['continue'] ) ) {
  206. return $retval;
  207. }
  208. $params = array_replace( $params, $data['continue'] );
  209. }
  210. }
  211. /**
  212. * Get hooks from a PHP file
  213. * @param string $filePath Full file path to the PHP file.
  214. * @return array Array: key => hook name; value => array of arguments or string 'unknown'
  215. */
  216. private function getHooksFromFile( $filePath ) {
  217. $content = file_get_contents( $filePath );
  218. $m = [];
  219. preg_match_all(
  220. // All functions which runs hooks
  221. '/(?:wfRunHooks|Hooks\:\:run|ContentHandler\:\:runLegacyHooks)\s*\(\s*' .
  222. // First argument is the hook name as string
  223. '([\'"])(.*?)\1' .
  224. // Comma for second argument
  225. '(?:\s*(,))?' .
  226. // Second argument must start with array to be processed
  227. '(?:\s*(?:array\s*\(|\[)' .
  228. // Matching inside array - allows one deep of brackets
  229. '((?:[^\(\)\[\]]|\((?-1)\)|\[(?-1)\])*)' .
  230. // End
  231. '[\)\]])?/',
  232. $content,
  233. $m,
  234. PREG_SET_ORDER
  235. );
  236. // Extract parameter
  237. $hooks = [];
  238. foreach ( $m as $match ) {
  239. $args = [];
  240. if ( isset( $match[4] ) ) {
  241. $n = [];
  242. if ( preg_match_all( '/((?:[^,\(\)]|\([^\(\)]*\))+)/', $match[4], $n ) ) {
  243. $args = array_map( 'trim', $n[1] );
  244. }
  245. } elseif ( isset( $match[3] ) ) {
  246. // Found a parameter for Hooks::run,
  247. // but could not extract the hooks argument,
  248. // because there are given by a variable
  249. $args = 'unknown';
  250. }
  251. $hooks[$match[2]] = $args;
  252. }
  253. return $hooks;
  254. }
  255. /**
  256. * Get bad hooks (where the hook name could not be determined) from a PHP file
  257. * @param string $filePath Full filename to the PHP file.
  258. * @return array Array of bad wfRunHooks() lines
  259. */
  260. private function getBadHooksFromFile( $filePath ) {
  261. $content = file_get_contents( $filePath );
  262. $m = [];
  263. // We want to skip the "function wfRunHooks()" one. :)
  264. preg_match_all( '/(?<!function )wfRunHooks\(\s*[^\s\'"].*/', $content, $m );
  265. $list = [];
  266. foreach ( $m[0] as $match ) {
  267. $list[] = $match . "(" . $filePath . ")";
  268. }
  269. return $list;
  270. }
  271. /**
  272. * Get hooks from a directory of PHP files.
  273. * @param string $dir Directory path to start at
  274. * @param int $recursive Pass self::FIND_RECURSIVE
  275. * @return array Array: key => hook name; value => array of arguments or string 'unknown'
  276. */
  277. private function getHooksFromDir( $dir, $recurse = 0 ) {
  278. $good = [];
  279. $bad = [];
  280. if ( $recurse === self::FIND_RECURSIVE ) {
  281. $iterator = new RecursiveIteratorIterator(
  282. new RecursiveDirectoryIterator( $dir, RecursiveDirectoryIterator::SKIP_DOTS ),
  283. RecursiveIteratorIterator::SELF_FIRST
  284. );
  285. } else {
  286. $iterator = new DirectoryIterator( $dir );
  287. }
  288. foreach ( $iterator as $info ) {
  289. // Ignore directories, work only on php files,
  290. if ( $info->isFile() && in_array( $info->getExtension(), [ 'php', 'inc' ] )
  291. // Skip this file as it contains text that looks like a bad wfRunHooks() call
  292. && $info->getRealPath() !== __FILE__
  293. ) {
  294. $good = array_merge( $good, $this->getHooksFromFile( $info->getRealPath() ) );
  295. $bad = array_merge( $bad, $this->getBadHooksFromFile( $info->getRealPath() ) );
  296. }
  297. }
  298. return [ 'good' => $good, 'bad' => $bad ];
  299. }
  300. /**
  301. * Nicely sort an print an array
  302. * @param string $msg A message to show before the value
  303. * @param array $arr
  304. */
  305. private function printArray( $msg, $arr ) {
  306. asort( $arr );
  307. foreach ( $arr as $v ) {
  308. $this->output( "$msg: $v\n" );
  309. }
  310. }
  311. }
  312. $maintClass = 'FindHooks';
  313. require_once RUN_MAINTENANCE_IF_MAIN;