HTMLCacheUpdate.php 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. <?php
  2. /**
  3. * Class to invalidate the HTML cache of all the pages linking to a given title.
  4. * Small numbers of links will be done immediately, large numbers are pushed onto
  5. * the job queue.
  6. *
  7. * This class is designed to work efficiently with small numbers of links, and
  8. * to work reasonably well with up to ~10^5 links. Above ~10^6 links, the memory
  9. * and time requirements of loading all backlinked IDs in doUpdate() might become
  10. * prohibitive. The requirements measured at Wikimedia are approximately:
  11. *
  12. * memory: 48 bytes per row
  13. * time: 16us per row for the query plus processing
  14. *
  15. * The reason this query is done is to support partitioning of the job
  16. * by backlinked ID. The memory issue could be allieviated by doing this query in
  17. * batches, but of course LIMIT with an offset is inefficient on the DB side.
  18. *
  19. * The class is nevertheless a vast improvement on the previous method of using
  20. * Image::getLinksTo() and Title::touchArray(), which uses about 2KB of memory per
  21. * link.
  22. *
  23. * @ingroup Cache
  24. */
  25. class HTMLCacheUpdate
  26. {
  27. public $mTitle, $mTable, $mPrefix;
  28. public $mRowsPerJob, $mRowsPerQuery;
  29. function __construct( $titleTo, $table ) {
  30. global $wgUpdateRowsPerJob, $wgUpdateRowsPerQuery;
  31. $this->mTitle = $titleTo;
  32. $this->mTable = $table;
  33. $this->mRowsPerJob = $wgUpdateRowsPerJob;
  34. $this->mRowsPerQuery = $wgUpdateRowsPerQuery;
  35. $this->mCache = $this->mTitle->getBacklinkCache();
  36. }
  37. public function doUpdate() {
  38. # Fetch the IDs
  39. $numRows = $this->mCache->getNumLinks( $this->mTable );
  40. if ( $numRows != 0 ) {
  41. if ( $numRows > $this->mRowsPerJob ) {
  42. $this->insertJobs();
  43. } else {
  44. $this->invalidate();
  45. }
  46. }
  47. wfRunHooks( 'HTMLCacheUpdate::doUpdate', array($this->mTitle) );
  48. }
  49. protected function insertJobs() {
  50. $batches = $this->mCache->partition( $this->mTable, $this->mRowsPerJob );
  51. if ( !$batches ) {
  52. return;
  53. }
  54. foreach ( $batches as $batch ) {
  55. $params = array(
  56. 'table' => $this->mTable,
  57. 'start' => $batch[0],
  58. 'end' => $batch[1],
  59. );
  60. $jobs[] = new HTMLCacheUpdateJob( $this->mTitle, $params );
  61. }
  62. Job::batchInsert( $jobs );
  63. }
  64. /**
  65. * Invalidate a set of pages, right now
  66. */
  67. public function invalidate( $startId = false, $endId = false ) {
  68. global $wgUseFileCache, $wgUseSquid;
  69. $titleArray = $this->mCache->getLinks( $this->mTable, $startId, $endId );
  70. if ( $titleArray->count() == 0 ) {
  71. return;
  72. }
  73. $dbw = wfGetDB( DB_MASTER );
  74. $timestamp = $dbw->timestamp();
  75. # Get all IDs in this query into an array
  76. $ids = array();
  77. foreach ( $titleArray as $title ) {
  78. $ids[] = $title->getArticleID();
  79. }
  80. # Update page_touched
  81. $dbw->update( 'page',
  82. array( 'page_touched' => $timestamp ),
  83. array( 'page_id IN (' . $dbw->makeList( $ids ) . ')' ),
  84. __METHOD__
  85. );
  86. # Update squid
  87. if ( $wgUseSquid ) {
  88. $u = SquidUpdate::newFromTitles( $titleArray );
  89. $u->doUpdate();
  90. }
  91. # Update file cache
  92. if ( $wgUseFileCache ) {
  93. foreach ( $titleArray as $title ) {
  94. HTMLFileCache::clearFileCache( $title );
  95. }
  96. }
  97. }
  98. }
  99. /**
  100. * Job wrapper for HTMLCacheUpdate. Gets run whenever a related
  101. * job gets called from the queue.
  102. *
  103. * @ingroup JobQueue
  104. */
  105. class HTMLCacheUpdateJob extends Job {
  106. var $table, $start, $end;
  107. /**
  108. * Construct a job
  109. * @param Title $title The title linked to
  110. * @param array $params Job parameters (table, start and end page_ids)
  111. * @param integer $id job_id
  112. */
  113. function __construct( $title, $params, $id = 0 ) {
  114. parent::__construct( 'htmlCacheUpdate', $title, $params, $id );
  115. $this->table = $params['table'];
  116. $this->start = $params['start'];
  117. $this->end = $params['end'];
  118. }
  119. public function run() {
  120. $update = new HTMLCacheUpdate( $this->title, $this->table );
  121. $update->invalidate( $this->start, $this->end );
  122. return true;
  123. }
  124. }