Categoryfinder.php 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. <?php
  2. /**
  3. * The "Categoryfinder" class takes a list of articles, creates an internal
  4. * representation of all their parent categories (as well as parents of
  5. * parents etc.). From this representation, it determines which of these
  6. * articles are in one or all of a given subset of categories.
  7. *
  8. * Example use :
  9. * <code>
  10. * # Determines whether the article with the page_id 12345 is in both
  11. * # "Category 1" and "Category 2" or their subcategories, respectively
  12. *
  13. * $cf = new Categoryfinder ;
  14. * $cf->seed (
  15. * array ( 12345 ) ,
  16. * array ( "Category 1","Category 2" ) ,
  17. * "AND"
  18. * ) ;
  19. * $a = $cf->run() ;
  20. * print implode ( "," , $a ) ;
  21. * </code>
  22. *
  23. */
  24. class Categoryfinder {
  25. var $articles = array () ; # The original article IDs passed to the seed function
  26. var $deadend = array () ; # Array of DBKEY category names for categories that don't have a page
  27. var $parents = array () ; # Array of [ID => array()]
  28. var $next = array () ; # Array of article/category IDs
  29. var $targets = array () ; # Array of DBKEY category names
  30. var $name2id = array () ;
  31. var $mode ; # "AND" or "OR"
  32. var $dbr ; # Read-DB slave
  33. /**
  34. * Constructor (currently empty).
  35. */
  36. function __construct() {
  37. }
  38. /**
  39. * Initializes the instance. Do this prior to calling run().
  40. * @param $article_ids Array of article IDs
  41. * @param $categories FIXME
  42. * @param $mode String: FIXME, default 'AND'.
  43. */
  44. function seed ( $article_ids , $categories , $mode = "AND" ) {
  45. $this->articles = $article_ids ;
  46. $this->next = $article_ids ;
  47. $this->mode = $mode ;
  48. # Set the list of target categories; convert them to DBKEY form first
  49. $this->targets = array () ;
  50. foreach ( $categories AS $c ) {
  51. $ct = Title::makeTitleSafe( NS_CATEGORY, $c );
  52. if( $ct ) {
  53. $c = $ct->getDBkey();
  54. $this->targets[$c] = $c;
  55. }
  56. }
  57. }
  58. /**
  59. * Iterates through the parent tree starting with the seed values,
  60. * then checks the articles if they match the conditions
  61. * @return array of page_ids (those given to seed() that match the conditions)
  62. */
  63. function run () {
  64. $this->dbr = wfGetDB( DB_SLAVE );
  65. while ( count ( $this->next ) > 0 ) {
  66. $this->scan_next_layer () ;
  67. }
  68. # Now check if this applies to the individual articles
  69. $ret = array () ;
  70. foreach ( $this->articles AS $article ) {
  71. $conds = $this->targets ;
  72. if ( $this->check ( $article , $conds ) ) {
  73. # Matches the conditions
  74. $ret[] = $article ;
  75. }
  76. }
  77. return $ret ;
  78. }
  79. /**
  80. * This functions recurses through the parent representation, trying to match the conditions
  81. * @param $id The article/category to check
  82. * @param $conds The array of categories to match
  83. * @param $path used to check for recursion loops
  84. * @return bool Does this match the conditions?
  85. */
  86. function check ( $id , &$conds, $path=array() ) {
  87. // Check for loops and stop!
  88. if( in_array( $id, $path ) )
  89. return false;
  90. $path[] = $id;
  91. # Shortcut (runtime paranoia): No contitions=all matched
  92. if ( count ( $conds ) == 0 ) return true ;
  93. if ( !isset ( $this->parents[$id] ) ) return false ;
  94. # iterate through the parents
  95. foreach ( $this->parents[$id] AS $p ) {
  96. $pname = $p->cl_to ;
  97. # Is this a condition?
  98. if ( isset ( $conds[$pname] ) ) {
  99. # This key is in the category list!
  100. if ( $this->mode == "OR" ) {
  101. # One found, that's enough!
  102. $conds = array () ;
  103. return true ;
  104. } else {
  105. # Assuming "AND" as default
  106. unset ( $conds[$pname] ) ;
  107. if ( count ( $conds ) == 0 ) {
  108. # All conditions met, done
  109. return true ;
  110. }
  111. }
  112. }
  113. # Not done yet, try sub-parents
  114. if ( !isset ( $this->name2id[$pname] ) ) {
  115. # No sub-parent
  116. continue ;
  117. }
  118. $done = $this->check ( $this->name2id[$pname] , $conds, $path );
  119. if ( $done OR count ( $conds ) == 0 ) {
  120. # Subparents have done it!
  121. return true ;
  122. }
  123. }
  124. return false ;
  125. }
  126. /**
  127. * Scans a "parent layer" of the articles/categories in $this->next
  128. */
  129. function scan_next_layer () {
  130. $fname = "Categoryfinder::scan_next_layer" ;
  131. # Find all parents of the article currently in $this->next
  132. $layer = array () ;
  133. $res = $this->dbr->select(
  134. /* FROM */ 'categorylinks',
  135. /* SELECT */ '*',
  136. /* WHERE */ array( 'cl_from' => $this->next ),
  137. $fname."-1"
  138. );
  139. while ( $o = $this->dbr->fetchObject( $res ) ) {
  140. $k = $o->cl_to ;
  141. # Update parent tree
  142. if ( !isset ( $this->parents[$o->cl_from] ) ) {
  143. $this->parents[$o->cl_from] = array () ;
  144. }
  145. $this->parents[$o->cl_from][$k] = $o ;
  146. # Ignore those we already have
  147. if ( in_array ( $k , $this->deadend ) ) continue ;
  148. if ( isset ( $this->name2id[$k] ) ) continue ;
  149. # Hey, new category!
  150. $layer[$k] = $k ;
  151. }
  152. $this->dbr->freeResult( $res ) ;
  153. $this->next = array() ;
  154. # Find the IDs of all category pages in $layer, if they exist
  155. if ( count ( $layer ) > 0 ) {
  156. $res = $this->dbr->select(
  157. /* FROM */ 'page',
  158. /* SELECT */ 'page_id,page_title',
  159. /* WHERE */ array( 'page_namespace' => NS_CATEGORY , 'page_title' => $layer ),
  160. $fname."-2"
  161. );
  162. while ( $o = $this->dbr->fetchObject( $res ) ) {
  163. $id = $o->page_id ;
  164. $name = $o->page_title ;
  165. $this->name2id[$name] = $id ;
  166. $this->next[] = $id ;
  167. unset ( $layer[$name] ) ;
  168. }
  169. $this->dbr->freeResult( $res ) ;
  170. }
  171. # Mark dead ends
  172. foreach ( $layer AS $v ) {
  173. $this->deadend[$v] = $v ;
  174. }
  175. }
  176. } # END OF CLASS "Categoryfinder"