BacklinkCache.php 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. <?php
  2. /**
  3. * Class for fetching backlink lists, approximate backlink counts and partitions.
  4. * Instances of this class should typically be fetched with $title->getBacklinkCache().
  5. *
  6. * Ideally you should only get your backlinks from here when you think there is some
  7. * advantage in caching them. Otherwise it's just a waste of memory.
  8. */
  9. class BacklinkCache {
  10. var $partitionCache = array();
  11. var $fullResultCache = array();
  12. var $title;
  13. var $db;
  14. const CACHE_EXPIRY = 3600;
  15. /**
  16. * Create a new BacklinkCache
  17. */
  18. function __construct( $title ) {
  19. $this->title = $title;
  20. }
  21. /**
  22. * Clear locally stored data
  23. */
  24. function clear() {
  25. $this->partitionCache = array();
  26. $this->fullResultCache = array();
  27. unset( $this->db );
  28. }
  29. /**
  30. * Set the Database object to use
  31. */
  32. public function setDB( $db ) {
  33. $this->db = $db;
  34. }
  35. protected function getDB() {
  36. if ( !isset( $this->db ) ) {
  37. $this->db = wfGetDB( DB_SLAVE );
  38. }
  39. return $this->db;
  40. }
  41. /**
  42. * Get the backlinks for a given table. Cached in process memory only.
  43. * @param string $table
  44. * @return TitleArray
  45. */
  46. public function getLinks( $table, $startId = false, $endId = false ) {
  47. wfProfileIn( __METHOD__ );
  48. if ( $startId || $endId ) {
  49. // Partial range, not cached
  50. wfDebug( __METHOD__.": from DB (uncacheable range)\n" );
  51. $conds = $this->getConditions( $table );
  52. // Use the from field in the condition rather than the joined page_id,
  53. // because databases are stupid and don't necessarily propagate indexes.
  54. $fromField = $this->getPrefix( $table ) . '_from';
  55. if ( $startId ) {
  56. $conds[] = "$fromField >= " . intval( $startId );
  57. }
  58. if ( $endId ) {
  59. $conds[] = "$fromField <= " . intval( $endId );
  60. }
  61. $res = $this->getDB()->select(
  62. array( $table, 'page' ),
  63. array( 'page_namespace', 'page_title', 'page_id'),
  64. $conds,
  65. __METHOD__,
  66. array('STRAIGHT_JOIN') );
  67. $ta = TitleArray::newFromResult( $res );
  68. wfProfileOut( __METHOD__ );
  69. return $ta;
  70. }
  71. if ( !isset( $this->fullResultCache[$table] ) ) {
  72. wfDebug( __METHOD__.": from DB\n" );
  73. $res = $this->getDB()->select(
  74. array( $table, 'page' ),
  75. array( 'page_namespace', 'page_title', 'page_id' ),
  76. $this->getConditions( $table ),
  77. __METHOD__,
  78. array('STRAIGHT_JOIN') );
  79. $this->fullResultCache[$table] = $res;
  80. }
  81. $ta = TitleArray::newFromResult( $this->fullResultCache[$table] );
  82. wfProfileOut( __METHOD__ );
  83. return $ta;
  84. }
  85. /**
  86. * Get the field name prefix for a given table
  87. */
  88. protected function getPrefix( $table ) {
  89. static $prefixes = array(
  90. 'pagelinks' => 'pl',
  91. 'imagelinks' => 'il',
  92. 'categorylinks' => 'cl',
  93. 'templatelinks' => 'tl',
  94. 'redirect' => 'rd',
  95. );
  96. if ( isset( $prefixes[$table] ) ) {
  97. return $prefixes[$table];
  98. } else {
  99. throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
  100. }
  101. }
  102. /**
  103. * Get the SQL condition array for selecting backlinks, with a join on the page table
  104. */
  105. protected function getConditions( $table ) {
  106. $prefix = $this->getPrefix( $table );
  107. switch ( $table ) {
  108. case 'pagelinks':
  109. case 'templatelinks':
  110. case 'redirect':
  111. $conds = array(
  112. "{$prefix}_namespace" => $this->title->getNamespace(),
  113. "{$prefix}_title" => $this->title->getDBkey(),
  114. "page_id={$prefix}_from"
  115. );
  116. break;
  117. case 'imagelinks':
  118. $conds = array(
  119. 'il_to' => $this->title->getDBkey(),
  120. 'page_id=il_from'
  121. );
  122. break;
  123. case 'categorylinks':
  124. $conds = array(
  125. 'cl_to' => $this->title->getDBkey(),
  126. 'page_id=cl_from',
  127. );
  128. break;
  129. default:
  130. throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
  131. }
  132. return $conds;
  133. }
  134. /**
  135. * Get the approximate number of backlinks
  136. */
  137. public function getNumLinks( $table ) {
  138. if ( isset( $this->fullResultCache[$table] ) ) {
  139. return $this->fullResultCache[$table]->numRows();
  140. }
  141. if ( isset( $this->partitionCache[$table] ) ) {
  142. $entry = reset( $this->partitionCache[$table] );
  143. return $entry['numRows'];
  144. }
  145. $titleArray = $this->getLinks( $table );
  146. return $titleArray->count();
  147. }
  148. /**
  149. * Partition the backlinks into batches.
  150. * Returns an array giving the start and end of each range. The first batch has
  151. * a start of false, and the last batch has an end of false.
  152. *
  153. * @param string $table The links table name
  154. * @param integer $batchSize
  155. * @return array
  156. */
  157. public function partition( $table, $batchSize ) {
  158. // Try cache
  159. if ( isset( $this->partitionCache[$table][$batchSize] ) ) {
  160. wfDebug( __METHOD__.": got from partition cache\n" );
  161. return $this->partitionCache[$table][$batchSize]['batches'];
  162. }
  163. $this->partitionCache[$table][$batchSize] = false;
  164. $cacheEntry =& $this->partitionCache[$table][$batchSize];
  165. // Try full result cache
  166. if ( isset( $this->fullResultCache[$table] ) ) {
  167. $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
  168. wfDebug( __METHOD__.": got from full result cache\n" );
  169. return $cacheEntry['batches'];
  170. }
  171. // Try memcached
  172. global $wgMemc;
  173. $memcKey = wfMemcKey( 'backlinks', md5( $this->title->getPrefixedDBkey() ),
  174. $table, $batchSize );
  175. $memcValue = $wgMemc->get( $memcKey );
  176. if ( is_array( $memcValue ) ) {
  177. $cacheEntry = $memcValue;
  178. wfDebug( __METHOD__.": got from memcached $memcKey\n" );
  179. return $cacheEntry['batches'];
  180. }
  181. // Fetch from database
  182. $this->getLinks( $table );
  183. $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
  184. // Save to memcached
  185. $wgMemc->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY );
  186. wfDebug( __METHOD__.": got from database\n" );
  187. return $cacheEntry['batches'];
  188. }
  189. /**
  190. * Partition a DB result with backlinks in it into batches
  191. */
  192. protected function partitionResult( $res, $batchSize ) {
  193. $batches = array();
  194. $numRows = $res->numRows();
  195. $numBatches = ceil( $numRows / $batchSize );
  196. for ( $i = 0; $i < $numBatches; $i++ ) {
  197. if ( $i == 0 ) {
  198. $start = false;
  199. } else {
  200. $rowNum = intval( $numRows * $i / $numBatches );
  201. $res->seek( $rowNum );
  202. $row = $res->fetchObject();
  203. $start = $row->page_id;
  204. }
  205. if ( $i == $numBatches - 1 ) {
  206. $end = false;
  207. } else {
  208. $rowNum = intval( $numRows * ( $i + 1 ) / $numBatches );
  209. $res->seek( $rowNum );
  210. $row = $res->fetchObject();
  211. $end = $row->page_id - 1;
  212. }
  213. $batches[] = array( $start, $end );
  214. }
  215. return array( 'numRows' => $numRows, 'batches' => $batches );
  216. }
  217. }