populateRevisionSha1.php 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. <?php
  2. /**
  3. * Fills the rev_sha1 and ar_sha1 columns of revision
  4. * and archive tables for revisions created before MW 1.19.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License along
  17. * with this program; if not, write to the Free Software Foundation, Inc.,
  18. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  19. * http://www.gnu.org/copyleft/gpl.html
  20. *
  21. * @file
  22. * @ingroup Maintenance
  23. */
  24. require_once __DIR__ . '/Maintenance.php';
  25. /**
  26. * Maintenance script that fills the rev_sha1 and ar_sha1 columns of revision
  27. * and archive tables for revisions created before MW 1.19.
  28. *
  29. * @ingroup Maintenance
  30. */
  31. class PopulateRevisionSha1 extends LoggedUpdateMaintenance {
  32. public function __construct() {
  33. parent::__construct();
  34. $this->addDescription( 'Populates the rev_sha1 and ar_sha1 fields' );
  35. $this->setBatchSize( 200 );
  36. }
  37. protected function getUpdateKey() {
  38. return 'populate rev_sha1';
  39. }
  40. protected function doDBUpdates() {
  41. $db = $this->getDB( DB_MASTER );
  42. if ( !$db->tableExists( 'revision' ) ) {
  43. $this->error( "revision table does not exist", true );
  44. } elseif ( !$db->tableExists( 'archive' ) ) {
  45. $this->error( "archive table does not exist", true );
  46. } elseif ( !$db->fieldExists( 'revision', 'rev_sha1', __METHOD__ ) ) {
  47. $this->output( "rev_sha1 column does not exist\n\n", true );
  48. return false;
  49. }
  50. $this->output( "Populating rev_sha1 column\n" );
  51. $rc = $this->doSha1Updates( 'revision', 'rev_id', 'rev' );
  52. $this->output( "Populating ar_sha1 column\n" );
  53. $ac = $this->doSha1Updates( 'archive', 'ar_rev_id', 'ar' );
  54. $this->output( "Populating ar_sha1 column legacy rows\n" );
  55. $ac += $this->doSha1LegacyUpdates();
  56. $this->output( "rev_sha1 and ar_sha1 population complete "
  57. . "[$rc revision rows, $ac archive rows].\n" );
  58. return true;
  59. }
  60. /**
  61. * @param string $table
  62. * @param string $idCol
  63. * @param string $prefix
  64. * @return int Rows changed
  65. */
  66. protected function doSha1Updates( $table, $idCol, $prefix ) {
  67. $db = $this->getDB( DB_MASTER );
  68. $start = $db->selectField( $table, "MIN($idCol)", false, __METHOD__ );
  69. $end = $db->selectField( $table, "MAX($idCol)", false, __METHOD__ );
  70. if ( !$start || !$end ) {
  71. $this->output( "...$table table seems to be empty.\n" );
  72. return 0;
  73. }
  74. $count = 0;
  75. # Do remaining chunk
  76. $end += $this->mBatchSize - 1;
  77. $blockStart = $start;
  78. $blockEnd = $start + $this->mBatchSize - 1;
  79. while ( $blockEnd <= $end ) {
  80. $this->output( "...doing $idCol from $blockStart to $blockEnd\n" );
  81. $cond = "$idCol BETWEEN $blockStart AND $blockEnd
  82. AND $idCol IS NOT NULL AND {$prefix}_sha1 = ''";
  83. $res = $db->select( $table, '*', $cond, __METHOD__ );
  84. $this->beginTransaction( $db, __METHOD__ );
  85. foreach ( $res as $row ) {
  86. if ( $this->upgradeRow( $row, $table, $idCol, $prefix ) ) {
  87. $count++;
  88. }
  89. }
  90. $this->commitTransaction( $db, __METHOD__ );
  91. $blockStart += $this->mBatchSize;
  92. $blockEnd += $this->mBatchSize;
  93. wfWaitForSlaves();
  94. }
  95. return $count;
  96. }
  97. /**
  98. * @return int
  99. */
  100. protected function doSha1LegacyUpdates() {
  101. $count = 0;
  102. $db = $this->getDB( DB_MASTER );
  103. $res = $db->select( 'archive', '*',
  104. [ 'ar_rev_id IS NULL', 'ar_sha1' => '' ], __METHOD__ );
  105. $updateSize = 0;
  106. $this->beginTransaction( $db, __METHOD__ );
  107. foreach ( $res as $row ) {
  108. if ( $this->upgradeLegacyArchiveRow( $row ) ) {
  109. ++$count;
  110. }
  111. if ( ++$updateSize >= 100 ) {
  112. $updateSize = 0;
  113. $this->commitTransaction( $db, __METHOD__ );
  114. $this->output( "Commited row with ar_timestamp={$row->ar_timestamp}\n" );
  115. wfWaitForSlaves();
  116. $this->beginTransaction( $db, __METHOD__ );
  117. }
  118. }
  119. $this->commitTransaction( $db, __METHOD__ );
  120. return $count;
  121. }
  122. /**
  123. * @param stdClass $row
  124. * @param string $table
  125. * @param string $idCol
  126. * @param string $prefix
  127. * @return bool
  128. */
  129. protected function upgradeRow( $row, $table, $idCol, $prefix ) {
  130. $db = $this->getDB( DB_MASTER );
  131. try {
  132. $rev = ( $table === 'archive' )
  133. ? Revision::newFromArchiveRow( $row )
  134. : new Revision( $row );
  135. $text = $rev->getSerializedData();
  136. } catch ( Exception $e ) {
  137. $this->output( "Data of revision with {$idCol}={$row->$idCol} unavailable!\n" );
  138. return false; // bug 22624?
  139. }
  140. if ( !is_string( $text ) ) {
  141. # This should not happen, but sometimes does (bug 20757)
  142. $this->output( "Data of revision with {$idCol}={$row->$idCol} unavailable!\n" );
  143. return false;
  144. } else {
  145. $db->update( $table,
  146. [ "{$prefix}_sha1" => Revision::base36Sha1( $text ) ],
  147. [ $idCol => $row->$idCol ],
  148. __METHOD__
  149. );
  150. return true;
  151. }
  152. }
  153. /**
  154. * @param stdClass $row
  155. * @return bool
  156. */
  157. protected function upgradeLegacyArchiveRow( $row ) {
  158. $db = $this->getDB( DB_MASTER );
  159. try {
  160. $rev = Revision::newFromArchiveRow( $row );
  161. } catch ( Exception $e ) {
  162. $this->output( "Text of revision with timestamp {$row->ar_timestamp} unavailable!\n" );
  163. return false; // bug 22624?
  164. }
  165. $text = $rev->getSerializedData();
  166. if ( !is_string( $text ) ) {
  167. # This should not happen, but sometimes does (bug 20757)
  168. $this->output( "Data of revision with timestamp {$row->ar_timestamp} unavailable!\n" );
  169. return false;
  170. } else {
  171. # Archive table as no PK, but (NS,title,time) should be near unique.
  172. # Any duplicates on those should also have duplicated text anyway.
  173. $db->update( 'archive',
  174. [ 'ar_sha1' => Revision::base36Sha1( $text ) ],
  175. [
  176. 'ar_namespace' => $row->ar_namespace,
  177. 'ar_title' => $row->ar_title,
  178. 'ar_timestamp' => $row->ar_timestamp,
  179. 'ar_len' => $row->ar_len // extra sanity
  180. ],
  181. __METHOD__
  182. );
  183. return true;
  184. }
  185. }
  186. }
  187. $maintClass = "PopulateRevisionSha1";
  188. require_once RUN_MAINTENANCE_IF_MAIN;