userDupes.inc 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. <?php
  2. /**
  3. * Helper class for update.php.
  4. *
  5. * Copyright © 2005 Brion Vibber <brion@pobox.com>
  6. * https://www.mediawiki.org/
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; either version 2 of the License, or
  11. * (at your option) any later version.
  12. *
  13. * This program is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License along
  19. * with this program; if not, write to the Free Software Foundation, Inc.,
  20. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  21. * http://www.gnu.org/copyleft/gpl.html
  22. *
  23. * @file
  24. * @ingroup Maintenance
  25. */
  26. /**
  27. * Look for duplicate user table entries and optionally prune them.
  28. *
  29. * This is still used by our MysqlUpdater at:
  30. * includes/installer/MysqlUpdater.php
  31. *
  32. * @ingroup Maintenance
  33. */
  34. class UserDupes {
  35. private $db;
  36. private $reassigned;
  37. private $trimmed;
  38. private $failed;
  39. private $outputCallback;
  40. function __construct( &$database, $outputCallback ) {
  41. $this->db = $database;
  42. $this->outputCallback = $outputCallback;
  43. }
  44. /**
  45. * Output some text via the output callback provided
  46. * @param string $str Text to print
  47. */
  48. private function out( $str ) {
  49. call_user_func( $this->outputCallback, $str );
  50. }
  51. /**
  52. * Check if this database's user table has already had a unique
  53. * user_name index applied.
  54. * @return bool
  55. */
  56. function hasUniqueIndex() {
  57. $info = $this->db->indexInfo( 'user', 'user_name', __METHOD__ );
  58. if ( !$info ) {
  59. $this->out( "WARNING: doesn't seem to have user_name index at all!\n" );
  60. return false;
  61. }
  62. # Confusingly, 'Non_unique' is 0 for *unique* indexes,
  63. # and 1 for *non-unique* indexes. Pass the crack, MySQL,
  64. # it's obviously some good stuff!
  65. return ( $info[0]->Non_unique == 0 );
  66. }
  67. /**
  68. * Checks the database for duplicate user account records
  69. * and remove them in preparation for application of a unique
  70. * index on the user_name field. Returns true if the table is
  71. * clean or if duplicates have been resolved automatically.
  72. *
  73. * May return false if there are unresolvable problems.
  74. * Status information will be echo'd to stdout.
  75. *
  76. * @return bool
  77. */
  78. function clearDupes() {
  79. return $this->checkDupes( true );
  80. }
  81. /**
  82. * Checks the database for duplicate user account records
  83. * in preparation for application of a unique index on the
  84. * user_name field. Returns true if the table is clean or
  85. * if duplicates can be resolved automatically.
  86. *
  87. * Returns false if there are duplicates and resolution was
  88. * not requested. (If doing resolution, edits may be reassigned.)
  89. * Status information will be echo'd to stdout.
  90. *
  91. * @param bool $doDelete Pass true to actually remove things
  92. * from the database; false to just check.
  93. * @return bool
  94. */
  95. function checkDupes( $doDelete = false ) {
  96. if ( $this->hasUniqueIndex() ) {
  97. echo wfWikiID() . " already has a unique index on its user table.\n";
  98. return true;
  99. }
  100. $this->lock();
  101. $this->out( "Checking for duplicate accounts...\n" );
  102. $dupes = $this->getDupes();
  103. $count = count( $dupes );
  104. $this->out( "Found $count accounts with duplicate records on " . wfWikiID() . ".\n" );
  105. $this->trimmed = 0;
  106. $this->reassigned = 0;
  107. $this->failed = 0;
  108. foreach ( $dupes as $name ) {
  109. $this->examine( $name, $doDelete );
  110. }
  111. $this->unlock();
  112. $this->out( "\n" );
  113. if ( $this->reassigned > 0 ) {
  114. if ( $doDelete ) {
  115. $this->out( "$this->reassigned duplicate accounts had edits "
  116. . "reassigned to a canonical record id.\n" );
  117. } else {
  118. $this->out( "$this->reassigned duplicate accounts need to have edits reassigned.\n" );
  119. }
  120. }
  121. if ( $this->trimmed > 0 ) {
  122. if ( $doDelete ) {
  123. $this->out( "$this->trimmed duplicate user records were deleted from "
  124. . wfWikiID() . ".\n" );
  125. } else {
  126. $this->out( "$this->trimmed duplicate user accounts were found on "
  127. . wfWikiID() . " which can be removed safely.\n" );
  128. }
  129. }
  130. if ( $this->failed > 0 ) {
  131. $this->out( "Something terribly awry; $this->failed duplicate accounts were not removed.\n" );
  132. return false;
  133. }
  134. if ( $this->trimmed == 0 || $doDelete ) {
  135. $this->out( "It is now safe to apply the unique index on user_name.\n" );
  136. return true;
  137. } else {
  138. $this->out( "Run this script again with the --fix option to automatically delete them.\n" );
  139. return false;
  140. }
  141. }
  142. /**
  143. * We don't want anybody to mess with our stuff...
  144. * @access private
  145. */
  146. function lock() {
  147. $set = [ 'user', 'revision' ];
  148. $names = array_map( [ $this, 'lockTable' ], $set );
  149. $tables = implode( ',', $names );
  150. $this->db->query( "LOCK TABLES $tables", __METHOD__ );
  151. }
  152. function lockTable( $table ) {
  153. return $this->db->tableName( $table ) . ' WRITE';
  154. }
  155. /**
  156. * @access private
  157. */
  158. function unlock() {
  159. $this->db->query( "UNLOCK TABLES", __METHOD__ );
  160. }
  161. /**
  162. * Grab usernames for which multiple records are present in the database.
  163. * @return array
  164. * @access private
  165. */
  166. function getDupes() {
  167. $user = $this->db->tableName( 'user' );
  168. $result = $this->db->query(
  169. "SELECT user_name,COUNT(*) AS n
  170. FROM $user
  171. GROUP BY user_name
  172. HAVING n > 1", __METHOD__ );
  173. $list = [];
  174. foreach ( $result as $row ) {
  175. $list[] = $row->user_name;
  176. }
  177. return $list;
  178. }
  179. /**
  180. * Examine user records for the given name. Try to see which record
  181. * will be the one that actually gets used, then check remaining records
  182. * for edits. If the dupes have no edits, we can safely remove them.
  183. * @param string $name
  184. * @param bool $doDelete
  185. * @access private
  186. */
  187. function examine( $name, $doDelete ) {
  188. $result = $this->db->select( 'user',
  189. [ 'user_id' ],
  190. [ 'user_name' => $name ],
  191. __METHOD__ );
  192. $firstRow = $this->db->fetchObject( $result );
  193. $firstId = $firstRow->user_id;
  194. $this->out( "Record that will be used for '$name' is user_id=$firstId\n" );
  195. foreach ( $result as $row ) {
  196. $dupeId = $row->user_id;
  197. $this->out( "... dupe id $dupeId: " );
  198. $edits = $this->editCount( $dupeId );
  199. if ( $edits > 0 ) {
  200. $this->reassigned++;
  201. $this->out( "has $edits edits! " );
  202. if ( $doDelete ) {
  203. $this->reassignEdits( $dupeId, $firstId );
  204. $newEdits = $this->editCount( $dupeId );
  205. if ( $newEdits == 0 ) {
  206. $this->out( "confirmed cleaned. " );
  207. } else {
  208. $this->failed++;
  209. $this->out( "WARNING! $newEdits remaining edits for $dupeId; NOT deleting user.\n" );
  210. continue;
  211. }
  212. } else {
  213. $this->out( "(will need to reassign edits on fix)" );
  214. }
  215. } else {
  216. $this->out( "ok, no edits. " );
  217. }
  218. $this->trimmed++;
  219. if ( $doDelete ) {
  220. $this->trimAccount( $dupeId );
  221. }
  222. $this->out( "\n" );
  223. }
  224. }
  225. /**
  226. * Count the number of edits attributed to this user.
  227. * Does not currently check log table or other things
  228. * where it might show up...
  229. * @param int $userid
  230. * @return int
  231. * @access private
  232. */
  233. function editCount( $userid ) {
  234. return intval( $this->db->selectField(
  235. 'revision',
  236. 'COUNT(*)',
  237. [ 'rev_user' => $userid ],
  238. __METHOD__ ) );
  239. }
  240. /**
  241. * @param int $from
  242. * @param int $to
  243. * @access private
  244. */
  245. function reassignEdits( $from, $to ) {
  246. $this->out( 'reassigning... ' );
  247. $this->db->update( 'revision',
  248. [ 'rev_user' => $to ],
  249. [ 'rev_user' => $from ],
  250. __METHOD__ );
  251. $this->out( "ok. " );
  252. }
  253. /**
  254. * Remove a user account line.
  255. * @param int $userid
  256. * @access private
  257. */
  258. function trimAccount( $userid ) {
  259. $this->out( "deleting..." );
  260. $this->db->delete( 'user', [ 'user_id' => $userid ], __METHOD__ );
  261. $this->out( " ok" );
  262. }
  263. }