convertLinks.php 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. <?php
  2. /**
  3. * Convert from the old links schema (string->ID) to the new schema (ID->ID).
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. * @ingroup Maintenance
  22. */
  23. require_once __DIR__ . '/Maintenance.php';
  24. /**
  25. * Maintenance script to convert from the old links schema (string->ID)
  26. * to the new schema (ID->ID).
  27. *
  28. * The wiki should be put into read-only mode while this script executes.
  29. *
  30. * @ingroup Maintenance
  31. */
  32. class ConvertLinks extends Maintenance {
  33. private $logPerformance;
  34. public function __construct() {
  35. parent::__construct();
  36. $this->addDescription(
  37. 'Convert from the old links schema (string->ID) to the new schema (ID->ID). '
  38. . 'The wiki should be put into read-only mode while this script executes' );
  39. $this->addArg( 'logperformance', "Log performance to perfLogFilename.", false );
  40. $this->addArg(
  41. 'perfLogFilename',
  42. "Filename where performance is logged if --logperformance was set "
  43. . "(defaults to 'convLinksPerf.txt').",
  44. false
  45. );
  46. $this->addArg(
  47. 'keep-links-table',
  48. "Don't overwrite the old links table with the new one, leave the new table at links_temp.",
  49. false
  50. );
  51. $this->addArg(
  52. 'nokeys',
  53. /* (What about InnoDB?) */
  54. "Don't create keys, and so allow duplicates in the new links table.\n"
  55. . "This gives a huge speed improvement for very large links tables which are MyISAM.",
  56. false
  57. );
  58. }
  59. public function getDbType() {
  60. return Maintenance::DB_ADMIN;
  61. }
  62. public function execute() {
  63. $dbw = $this->getDB( DB_MASTER );
  64. $type = $dbw->getType();
  65. if ( $type != 'mysql' ) {
  66. $this->output( "Link table conversion not necessary for $type\n" );
  67. return;
  68. }
  69. global $wgContLang;
  70. # counters etc
  71. $numBadLinks = $curRowsRead = 0;
  72. # total tuples INSERTed into links_temp
  73. $totalTuplesInserted = 0;
  74. # whether or not to give progress reports while reading IDs from cur table
  75. $reportCurReadProgress = true;
  76. # number of rows between progress reports
  77. $curReadReportInterval = 1000;
  78. # whether or not to give progress reports during conversion
  79. $reportLinksConvProgress = true;
  80. # number of rows per INSERT
  81. $linksConvInsertInterval = 1000;
  82. $initialRowOffset = 0;
  83. # not used yet; highest row number from links table to process
  84. # $finalRowOffset = 0;
  85. $overwriteLinksTable = !$this->hasOption( 'keep-links-table' );
  86. $noKeys = $this->hasOption( 'noKeys' );
  87. $this->logPerformance = $this->hasOption( 'logperformance' );
  88. $perfLogFilename = $this->getArg( 'perfLogFilename', "convLinksPerf.txt" );
  89. # --------------------------------------------------------------------
  90. list( $cur, $links, $links_temp, $links_backup ) =
  91. $dbw->tableNamesN( 'cur', 'links', 'links_temp', 'links_backup' );
  92. if ( $dbw->tableExists( 'pagelinks' ) ) {
  93. $this->output( "...have pagelinks; skipping old links table updates\n" );
  94. return;
  95. }
  96. $res = $dbw->query( "SELECT l_from FROM $links LIMIT 1" );
  97. if ( $dbw->fieldType( $res, 0 ) == "int" ) {
  98. $this->output( "Schema already converted\n" );
  99. return;
  100. }
  101. $res = $dbw->query( "SELECT COUNT(*) AS count FROM $links" );
  102. $row = $dbw->fetchObject( $res );
  103. $numRows = $row->count;
  104. $dbw->freeResult( $res );
  105. if ( $numRows == 0 ) {
  106. $this->output( "Updating schema (no rows to convert)...\n" );
  107. $this->createTempTable();
  108. } else {
  109. $fh = false;
  110. if ( $this->logPerformance ) {
  111. $fh = fopen( $perfLogFilename, "w" );
  112. if ( !$fh ) {
  113. $this->error( "Couldn't open $perfLogFilename" );
  114. $this->logPerformance = false;
  115. }
  116. }
  117. $baseTime = $startTime = microtime( true );
  118. # Create a title -> cur_id map
  119. $this->output( "Loading IDs from $cur table...\n" );
  120. $this->performanceLog( $fh, "Reading $numRows rows from cur table...\n" );
  121. $this->performanceLog( $fh, "rows read vs seconds elapsed:\n" );
  122. $dbw->bufferResults( false );
  123. $res = $dbw->query( "SELECT cur_namespace,cur_title,cur_id FROM $cur" );
  124. $ids = [];
  125. foreach ( $res as $row ) {
  126. $title = $row->cur_title;
  127. if ( $row->cur_namespace ) {
  128. $title = $wgContLang->getNsText( $row->cur_namespace ) . ":$title";
  129. }
  130. $ids[$title] = $row->cur_id;
  131. $curRowsRead++;
  132. if ( $reportCurReadProgress ) {
  133. if ( ( $curRowsRead % $curReadReportInterval ) == 0 ) {
  134. $this->performanceLog(
  135. $fh,
  136. $curRowsRead . " " . ( microtime( true ) - $baseTime ) . "\n"
  137. );
  138. $this->output( "\t$curRowsRead rows of $cur table read.\n" );
  139. }
  140. }
  141. }
  142. $dbw->freeResult( $res );
  143. $dbw->bufferResults( true );
  144. $this->output( "Finished loading IDs.\n\n" );
  145. $this->performanceLog(
  146. $fh,
  147. "Took " . ( microtime( true ) - $baseTime ) . " seconds to load IDs.\n\n"
  148. );
  149. # --------------------------------------------------------------------
  150. # Now, step through the links table (in chunks of $linksConvInsertInterval rows),
  151. # convert, and write to the new table.
  152. $this->createTempTable();
  153. $this->performanceLog( $fh, "Resetting timer.\n\n" );
  154. $baseTime = microtime( true );
  155. $this->output( "Processing $numRows rows from $links table...\n" );
  156. $this->performanceLog( $fh, "Processing $numRows rows from $links table...\n" );
  157. $this->performanceLog( $fh, "rows inserted vs seconds elapsed:\n" );
  158. for ( $rowOffset = $initialRowOffset; $rowOffset < $numRows;
  159. $rowOffset += $linksConvInsertInterval
  160. ) {
  161. $sqlRead = "SELECT * FROM $links ";
  162. $sqlRead = $dbw->limitResult( $sqlRead, $linksConvInsertInterval, $rowOffset );
  163. $res = $dbw->query( $sqlRead );
  164. if ( $noKeys ) {
  165. $sqlWrite = [ "INSERT INTO $links_temp (l_from,l_to) VALUES " ];
  166. } else {
  167. $sqlWrite = [ "INSERT IGNORE INTO $links_temp (l_from,l_to) VALUES " ];
  168. }
  169. $tuplesAdded = 0; # no tuples added to INSERT yet
  170. foreach ( $res as $row ) {
  171. $fromTitle = $row->l_from;
  172. if ( array_key_exists( $fromTitle, $ids ) ) { # valid title
  173. $from = $ids[$fromTitle];
  174. $to = $row->l_to;
  175. if ( $tuplesAdded != 0 ) {
  176. $sqlWrite[] = ",";
  177. }
  178. $sqlWrite[] = "($from,$to)";
  179. $tuplesAdded++;
  180. } else { # invalid title
  181. $numBadLinks++;
  182. }
  183. }
  184. $dbw->freeResult( $res );
  185. # $this->output( "rowOffset: $rowOffset\ttuplesAdded: "
  186. # . "$tuplesAdded\tnumBadLinks: $numBadLinks\n" );
  187. if ( $tuplesAdded != 0 ) {
  188. if ( $reportLinksConvProgress ) {
  189. $this->output( "Inserting $tuplesAdded tuples into $links_temp..." );
  190. }
  191. $dbw->query( implode( "", $sqlWrite ) );
  192. $totalTuplesInserted += $tuplesAdded;
  193. if ( $reportLinksConvProgress ) {
  194. $this->output( " done. Total $totalTuplesInserted tuples inserted.\n" );
  195. $this->performanceLog(
  196. $fh,
  197. $totalTuplesInserted . " " . ( microtime( true ) - $baseTime ) . "\n"
  198. );
  199. }
  200. }
  201. }
  202. $this->output( "$totalTuplesInserted valid titles and "
  203. . "$numBadLinks invalid titles were processed.\n\n" );
  204. $this->performanceLog(
  205. $fh,
  206. "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n"
  207. );
  208. $this->performanceLog(
  209. $fh,
  210. "Total execution time: " . ( microtime( true ) - $startTime ) . " seconds.\n"
  211. );
  212. if ( $this->logPerformance ) {
  213. fclose( $fh );
  214. }
  215. }
  216. # --------------------------------------------------------------------
  217. if ( $overwriteLinksTable ) {
  218. # Check for existing links_backup, and delete it if it exists.
  219. $this->output( "Dropping backup links table if it exists..." );
  220. $dbw->query( "DROP TABLE IF EXISTS $links_backup", __METHOD__ );
  221. $this->output( " done.\n" );
  222. # Swap in the new table, and move old links table to links_backup
  223. $this->output( "Swapping tables '$links' to '$links_backup'; '$links_temp' to '$links'..." );
  224. $dbw->query( "RENAME TABLE links TO $links_backup, $links_temp TO $links", __METHOD__ );
  225. $this->output( " done.\n\n" );
  226. $this->output( "Conversion complete. The old table remains at $links_backup;\n" );
  227. $this->output( "delete at your leisure.\n" );
  228. } else {
  229. $this->output( "Conversion complete. The converted table is at $links_temp;\n" );
  230. $this->output( "the original links table is unchanged.\n" );
  231. }
  232. }
  233. private function createTempTable() {
  234. $dbConn = $this->getDB( DB_MASTER );
  235. if ( !( $dbConn->isOpen() ) ) {
  236. $this->output( "Opening connection to database failed.\n" );
  237. return;
  238. }
  239. $links_temp = $dbConn->tableName( 'links_temp' );
  240. $this->output( "Dropping temporary links table if it exists..." );
  241. $dbConn->query( "DROP TABLE IF EXISTS $links_temp" );
  242. $this->output( " done.\n" );
  243. $this->output( "Creating temporary links table..." );
  244. if ( $this->hasOption( 'noKeys' ) ) {
  245. $dbConn->query( "CREATE TABLE $links_temp ( " .
  246. "l_from int(8) unsigned NOT NULL default '0', " .
  247. "l_to int(8) unsigned NOT NULL default '0')" );
  248. } else {
  249. $dbConn->query( "CREATE TABLE $links_temp ( " .
  250. "l_from int(8) unsigned NOT NULL default '0', " .
  251. "l_to int(8) unsigned NOT NULL default '0', " .
  252. "UNIQUE KEY l_from(l_from,l_to), " .
  253. "KEY (l_to))" );
  254. }
  255. $this->output( " done.\n\n" );
  256. }
  257. private function performanceLog( $fh, $text ) {
  258. if ( $this->logPerformance ) {
  259. fwrite( $fh, $text );
  260. }
  261. }
  262. }
  263. $maintClass = "ConvertLinks";
  264. require_once RUN_MAINTENANCE_IF_MAIN;