123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199 |
- <?php
- /**
- * The "Categoryfinder" class takes a list of articles, creates an internal
- * representation of all their parent categories (as well as parents of
- * parents etc.). From this representation, it determines which of these
- * articles are in one or all of a given subset of categories.
- *
- * Example use :
- * <code>
- * # Determines whether the article with the page_id 12345 is in both
- * # "Category 1" and "Category 2" or their subcategories, respectively
- *
- * $cf = new Categoryfinder ;
- * $cf->seed (
- * array ( 12345 ) ,
- * array ( "Category 1","Category 2" ) ,
- * "AND"
- * ) ;
- * $a = $cf->run() ;
- * print implode ( "," , $a ) ;
- * </code>
- *
- */
- class Categoryfinder {
- var $articles = array () ; # The original article IDs passed to the seed function
- var $deadend = array () ; # Array of DBKEY category names for categories that don't have a page
- var $parents = array () ; # Array of [ID => array()]
- var $next = array () ; # Array of article/category IDs
- var $targets = array () ; # Array of DBKEY category names
- var $name2id = array () ;
- var $mode ; # "AND" or "OR"
- var $dbr ; # Read-DB slave
- /**
- * Constructor (currently empty).
- */
- function __construct() {
- }
- /**
- * Initializes the instance. Do this prior to calling run().
- * @param $article_ids Array of article IDs
- * @param $categories FIXME
- * @param $mode String: FIXME, default 'AND'.
- */
- function seed ( $article_ids , $categories , $mode = "AND" ) {
- $this->articles = $article_ids ;
- $this->next = $article_ids ;
- $this->mode = $mode ;
- # Set the list of target categories; convert them to DBKEY form first
- $this->targets = array () ;
- foreach ( $categories AS $c ) {
- $ct = Title::makeTitleSafe( NS_CATEGORY, $c );
- if( $ct ) {
- $c = $ct->getDBkey();
- $this->targets[$c] = $c;
- }
- }
- }
- /**
- * Iterates through the parent tree starting with the seed values,
- * then checks the articles if they match the conditions
- * @return array of page_ids (those given to seed() that match the conditions)
- */
- function run () {
- $this->dbr = wfGetDB( DB_SLAVE );
- while ( count ( $this->next ) > 0 ) {
- $this->scan_next_layer () ;
- }
- # Now check if this applies to the individual articles
- $ret = array () ;
- foreach ( $this->articles AS $article ) {
- $conds = $this->targets ;
- if ( $this->check ( $article , $conds ) ) {
- # Matches the conditions
- $ret[] = $article ;
- }
- }
- return $ret ;
- }
- /**
- * This functions recurses through the parent representation, trying to match the conditions
- * @param $id The article/category to check
- * @param $conds The array of categories to match
- * @param $path used to check for recursion loops
- * @return bool Does this match the conditions?
- */
- function check ( $id , &$conds, $path=array() ) {
- // Check for loops and stop!
- if( in_array( $id, $path ) )
- return false;
- $path[] = $id;
- # Shortcut (runtime paranoia): No contitions=all matched
- if ( count ( $conds ) == 0 ) return true ;
- if ( !isset ( $this->parents[$id] ) ) return false ;
- # iterate through the parents
- foreach ( $this->parents[$id] AS $p ) {
- $pname = $p->cl_to ;
- # Is this a condition?
- if ( isset ( $conds[$pname] ) ) {
- # This key is in the category list!
- if ( $this->mode == "OR" ) {
- # One found, that's enough!
- $conds = array () ;
- return true ;
- } else {
- # Assuming "AND" as default
- unset ( $conds[$pname] ) ;
- if ( count ( $conds ) == 0 ) {
- # All conditions met, done
- return true ;
- }
- }
- }
- # Not done yet, try sub-parents
- if ( !isset ( $this->name2id[$pname] ) ) {
- # No sub-parent
- continue ;
- }
- $done = $this->check ( $this->name2id[$pname] , $conds, $path );
- if ( $done OR count ( $conds ) == 0 ) {
- # Subparents have done it!
- return true ;
- }
- }
- return false ;
- }
- /**
- * Scans a "parent layer" of the articles/categories in $this->next
- */
- function scan_next_layer () {
- $fname = "Categoryfinder::scan_next_layer" ;
- # Find all parents of the article currently in $this->next
- $layer = array () ;
- $res = $this->dbr->select(
- /* FROM */ 'categorylinks',
- /* SELECT */ '*',
- /* WHERE */ array( 'cl_from' => $this->next ),
- $fname."-1"
- );
- while ( $o = $this->dbr->fetchObject( $res ) ) {
- $k = $o->cl_to ;
- # Update parent tree
- if ( !isset ( $this->parents[$o->cl_from] ) ) {
- $this->parents[$o->cl_from] = array () ;
- }
- $this->parents[$o->cl_from][$k] = $o ;
- # Ignore those we already have
- if ( in_array ( $k , $this->deadend ) ) continue ;
- if ( isset ( $this->name2id[$k] ) ) continue ;
- # Hey, new category!
- $layer[$k] = $k ;
- }
- $this->dbr->freeResult( $res ) ;
- $this->next = array() ;
- # Find the IDs of all category pages in $layer, if they exist
- if ( count ( $layer ) > 0 ) {
- $res = $this->dbr->select(
- /* FROM */ 'page',
- /* SELECT */ 'page_id,page_title',
- /* WHERE */ array( 'page_namespace' => NS_CATEGORY , 'page_title' => $layer ),
- $fname."-2"
- );
- while ( $o = $this->dbr->fetchObject( $res ) ) {
- $id = $o->page_id ;
- $name = $o->page_title ;
- $this->name2id[$name] = $id ;
- $this->next[] = $id ;
- unset ( $layer[$name] ) ;
- }
- $this->dbr->freeResult( $res ) ;
- }
- # Mark dead ends
- foreach ( $layer AS $v ) {
- $this->deadend[$v] = $v ;
- }
- }
- } # END OF CLASS "Categoryfinder"
|