populateParentId.php 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. <?php
  2. /**
  3. * Makes the required database updates for rev_parent_id
  4. * to be of any use. It can be used for some simple tracking
  5. * and to find new page edits by users.
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License along
  18. * with this program; if not, write to the Free Software Foundation, Inc.,
  19. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20. * http://www.gnu.org/copyleft/gpl.html
  21. *
  22. * @file
  23. * @ingroup Maintenance
  24. */
  25. require_once __DIR__ . '/Maintenance.php';
  26. /**
  27. * Maintenance script that makes the required database updates for rev_parent_id
  28. * to be of any use.
  29. *
  30. * @ingroup Maintenance
  31. */
  32. class PopulateParentId extends LoggedUpdateMaintenance {
  33. public function __construct() {
  34. parent::__construct();
  35. $this->addDescription( 'Populates rev_parent_id' );
  36. }
  37. protected function getUpdateKey() {
  38. return 'populate rev_parent_id';
  39. }
  40. protected function updateSkippedMessage() {
  41. return 'rev_parent_id column of revision table already populated.';
  42. }
  43. protected function doDBUpdates() {
  44. $db = $this->getDB( DB_MASTER );
  45. if ( !$db->tableExists( 'revision' ) ) {
  46. $this->error( "revision table does not exist" );
  47. return false;
  48. }
  49. $this->output( "Populating rev_parent_id column\n" );
  50. $start = $db->selectField( 'revision', 'MIN(rev_id)', false, __FUNCTION__ );
  51. $end = $db->selectField( 'revision', 'MAX(rev_id)', false, __FUNCTION__ );
  52. if ( is_null( $start ) || is_null( $end ) ) {
  53. $this->output( "...revision table seems to be empty, nothing to do.\n" );
  54. return true;
  55. }
  56. # Do remaining chunk
  57. $blockStart = intval( $start );
  58. $blockEnd = intval( $start ) + $this->mBatchSize - 1;
  59. $count = 0;
  60. $changed = 0;
  61. while ( $blockStart <= $end ) {
  62. $this->output( "...doing rev_id from $blockStart to $blockEnd\n" );
  63. $cond = "rev_id BETWEEN $blockStart AND $blockEnd";
  64. $res = $db->select( 'revision',
  65. [ 'rev_id', 'rev_page', 'rev_timestamp', 'rev_parent_id' ],
  66. [ $cond, 'rev_parent_id' => null ], __METHOD__ );
  67. # Go through and update rev_parent_id from these rows.
  68. # Assume that the previous revision of the title was
  69. # the original previous revision of the title when the
  70. # edit was made...
  71. foreach ( $res as $row ) {
  72. # First, check rows with the same timestamp other than this one
  73. # with a smaller rev ID. The highest ID "wins". This avoids loops
  74. # as timestamp can only decrease and never loops with IDs (from parent to parent)
  75. $previousID = $db->selectField( 'revision', 'rev_id',
  76. [ 'rev_page' => $row->rev_page, 'rev_timestamp' => $row->rev_timestamp,
  77. "rev_id < " . intval( $row->rev_id ) ],
  78. __METHOD__,
  79. [ 'ORDER BY' => 'rev_id DESC' ] );
  80. # If there are none, check the highest ID with a lower timestamp
  81. if ( !$previousID ) {
  82. # Get the highest older timestamp
  83. $lastTimestamp = $db->selectField(
  84. 'revision',
  85. 'rev_timestamp',
  86. [
  87. 'rev_page' => $row->rev_page,
  88. "rev_timestamp < " . $db->addQuotes( $row->rev_timestamp )
  89. ],
  90. __METHOD__,
  91. [ 'ORDER BY' => 'rev_timestamp DESC' ]
  92. );
  93. # If there is one, let the highest rev ID win
  94. if ( $lastTimestamp ) {
  95. $previousID = $db->selectField( 'revision', 'rev_id',
  96. [ 'rev_page' => $row->rev_page, 'rev_timestamp' => $lastTimestamp ],
  97. __METHOD__,
  98. [ 'ORDER BY' => 'rev_id DESC' ] );
  99. }
  100. }
  101. $previousID = intval( $previousID );
  102. if ( $previousID != $row->rev_parent_id ) {
  103. $changed++;
  104. }
  105. # Update the row...
  106. $db->update( 'revision',
  107. [ 'rev_parent_id' => $previousID ],
  108. [ 'rev_id' => $row->rev_id ],
  109. __METHOD__ );
  110. $count++;
  111. }
  112. $blockStart += $this->mBatchSize;
  113. $blockEnd += $this->mBatchSize;
  114. wfWaitForSlaves();
  115. }
  116. $this->output( "rev_parent_id population complete ... {$count} rows [{$changed} changed]\n" );
  117. return true;
  118. }
  119. }
  120. $maintClass = "PopulateParentId";
  121. require_once RUN_MAINTENANCE_IF_MAIN;