updateSearchIndex.php 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. <?php
  2. /**
  3. * Periodic off-peak updating of the search index.
  4. *
  5. * Usage: php updateSearchIndex.php [-s START] [-e END] [-p POSFILE] [-l LOCKTIME] [-q]
  6. * Where START is the starting timestamp
  7. * END is the ending timestamp
  8. * POSFILE is a file to load timestamps from and save them to, searchUpdate.WIKI_ID.pos by default
  9. * LOCKTIME is how long the searchindex and revision tables will be locked for
  10. * -q means quiet
  11. *
  12. * This program is free software; you can redistribute it and/or modify
  13. * it under the terms of the GNU General Public License as published by
  14. * the Free Software Foundation; either version 2 of the License, or
  15. * (at your option) any later version.
  16. *
  17. * This program is distributed in the hope that it will be useful,
  18. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  20. * GNU General Public License for more details.
  21. *
  22. * You should have received a copy of the GNU General Public License along
  23. * with this program; if not, write to the Free Software Foundation, Inc.,
  24. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  25. * http://www.gnu.org/copyleft/gpl.html
  26. *
  27. * @file
  28. * @ingroup Maintenance
  29. */
  30. require_once __DIR__ . '/Maintenance.php';
  31. /**
  32. * Maintenance script for periodic off-peak updating of the search index.
  33. *
  34. * @ingroup Maintenance
  35. */
  36. class UpdateSearchIndex extends Maintenance {
  37. public function __construct() {
  38. parent::__construct();
  39. $this->addDescription( 'Script for periodic off-peak updating of the search index' );
  40. $this->addOption( 's', 'starting timestamp', false, true );
  41. $this->addOption( 'e', 'Ending timestamp', false, true );
  42. $this->addOption(
  43. 'p',
  44. 'File for saving/loading timestamps, searchUpdate.WIKI_ID.pos by default',
  45. false,
  46. true
  47. );
  48. $this->addOption(
  49. 'l',
  50. 'How long the searchindex and revision tables will be locked for',
  51. false,
  52. true
  53. );
  54. }
  55. public function getDbType() {
  56. return Maintenance::DB_ADMIN;
  57. }
  58. public function execute() {
  59. $posFile = $this->getOption( 'p', 'searchUpdate.' . wfWikiID() . '.pos' );
  60. $end = $this->getOption( 'e', wfTimestampNow() );
  61. if ( $this->hasOption( 's' ) ) {
  62. $start = $this->getOption( 's' );
  63. } elseif ( is_readable( 'searchUpdate.pos' ) ) {
  64. # B/c to the old position file name which was hardcoded
  65. # We can safely delete the file when we're done though.
  66. $start = file_get_contents( 'searchUpdate.pos' );
  67. unlink( 'searchUpdate.pos' );
  68. } elseif ( is_readable( $posFile ) ) {
  69. $start = file_get_contents( $posFile );
  70. } else {
  71. $start = wfTimestamp( TS_MW, time() - 86400 );
  72. }
  73. $lockTime = $this->getOption( 'l', 20 );
  74. $this->doUpdateSearchIndex( $start, $end, $lockTime );
  75. if ( is_writable( dirname( realpath( $posFile ) ) ) ) {
  76. $file = fopen( $posFile, 'w' );
  77. if ( $file !== false ) {
  78. fwrite( $file, $end );
  79. fclose( $file );
  80. } else {
  81. $this->error( "*** Couldn't write to the $posFile!\n" );
  82. }
  83. } else {
  84. $this->error( "*** Couldn't write to the $posFile!\n" );
  85. }
  86. }
  87. private function doUpdateSearchIndex( $start, $end, $maxLockTime ) {
  88. global $wgDisableSearchUpdate;
  89. $wgDisableSearchUpdate = false;
  90. $dbw = $this->getDB( DB_MASTER );
  91. $recentchanges = $dbw->tableName( 'recentchanges' );
  92. $this->output( "Updating searchindex between $start and $end\n" );
  93. # Select entries from recentchanges which are on top and between the specified times
  94. $start = $dbw->timestamp( $start );
  95. $end = $dbw->timestamp( $end );
  96. $page = $dbw->tableName( 'page' );
  97. $sql = "SELECT rc_cur_id FROM $recentchanges
  98. JOIN $page ON rc_cur_id=page_id AND rc_this_oldid=page_latest
  99. WHERE rc_type != " . RC_LOG . " AND rc_timestamp BETWEEN '$start' AND '$end'";
  100. $res = $dbw->query( $sql, __METHOD__ );
  101. $this->updateSearchIndex( $maxLockTime, [ $this, 'searchIndexUpdateCallback' ], $dbw, $res );
  102. $this->output( "Done\n" );
  103. }
  104. public function searchIndexUpdateCallback( $dbw, $row ) {
  105. $this->updateSearchIndexForPage( $dbw, $row->rc_cur_id );
  106. }
  107. }
  108. $maintClass = "UpdateSearchIndex";
  109. require_once RUN_MAINTENANCE_IF_MAIN;