BatchRowIterator.php 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. <?php
  2. use Wikimedia\Rdbms\IDatabase;
  3. /**
  4. * Allows iterating a large number of rows in batches transparently.
  5. * By default when iterated over returns the full query result as an
  6. * array of rows. Can be wrapped in RecursiveIteratorIterator to
  7. * collapse those arrays into a single stream of rows queried in batches.
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU General Public License as published by
  11. * the Free Software Foundation; either version 2 of the License, or
  12. * (at your option) any later version.
  13. *
  14. * This program is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. * GNU General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU General Public License along
  20. * with this program; if not, write to the Free Software Foundation, Inc.,
  21. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  22. * http://www.gnu.org/copyleft/gpl.html
  23. *
  24. * @file
  25. * @ingroup Maintenance
  26. */
  27. class BatchRowIterator implements RecursiveIterator {
  28. /**
  29. * @var IDatabase $db The database to read from
  30. */
  31. protected $db;
  32. /**
  33. * @var string|array $table The name or names of the table to read from
  34. */
  35. protected $table;
  36. /**
  37. * @var array $primaryKey The name of the primary key(s)
  38. */
  39. protected $primaryKey;
  40. /**
  41. * @var int $batchSize The number of rows to fetch per iteration
  42. */
  43. protected $batchSize;
  44. /**
  45. * @var array $conditions Array of strings containing SQL conditions
  46. * to add to the query
  47. */
  48. protected $conditions = [];
  49. /**
  50. * @var array $joinConditions
  51. */
  52. protected $joinConditions = [];
  53. /**
  54. * @var array $fetchColumns List of column names to select from the
  55. * table suitable for use with IDatabase::select()
  56. */
  57. protected $fetchColumns;
  58. /**
  59. * @var string $orderBy SQL Order by condition generated from $this->primaryKey
  60. */
  61. protected $orderBy;
  62. /**
  63. * @var array $current The current iterator value
  64. */
  65. private $current = [];
  66. /**
  67. * @var int key 0-indexed number of pages fetched since self::reset()
  68. */
  69. private $key;
  70. /**
  71. * @var array Additional query options
  72. */
  73. protected $options = [];
  74. /**
  75. * @param IDatabase $db The database to read from
  76. * @param string|array $table The name or names of the table to read from
  77. * @param string|array $primaryKey The name or names of the primary key columns
  78. * @param int $batchSize The number of rows to fetch per iteration
  79. * @throws InvalidArgumentException
  80. */
  81. public function __construct( IDatabase $db, $table, $primaryKey, $batchSize ) {
  82. if ( $batchSize < 1 ) {
  83. throw new InvalidArgumentException( 'Batch size must be at least 1 row.' );
  84. }
  85. $this->db = $db;
  86. $this->table = $table;
  87. $this->primaryKey = (array)$primaryKey;
  88. $this->fetchColumns = $this->primaryKey;
  89. $this->orderBy = implode( ' ASC,', $this->primaryKey ) . ' ASC';
  90. $this->batchSize = $batchSize;
  91. }
  92. /**
  93. * @param array $conditions Query conditions suitable for use with
  94. * IDatabase::select
  95. */
  96. public function addConditions( array $conditions ) {
  97. $this->conditions = array_merge( $this->conditions, $conditions );
  98. }
  99. /**
  100. * @param array $options Query options suitable for use with
  101. * IDatabase::select
  102. */
  103. public function addOptions( array $options ) {
  104. $this->options = array_merge( $this->options, $options );
  105. }
  106. /**
  107. * @param array $conditions Query join conditions suitable for use
  108. * with IDatabase::select
  109. */
  110. public function addJoinConditions( array $conditions ) {
  111. $this->joinConditions = array_merge( $this->joinConditions, $conditions );
  112. }
  113. /**
  114. * @param array $columns List of column names to select from the
  115. * table suitable for use with IDatabase::select()
  116. */
  117. public function setFetchColumns( array $columns ) {
  118. // If it's not the all column selector merge in the primary keys we need
  119. if ( count( $columns ) === 1 && reset( $columns ) === '*' ) {
  120. $this->fetchColumns = $columns;
  121. } else {
  122. $this->fetchColumns = array_unique( array_merge(
  123. $this->primaryKey,
  124. $columns
  125. ) );
  126. }
  127. }
  128. /**
  129. * Extracts the primary key(s) from a database row.
  130. *
  131. * @param stdClass $row An individual database row from this iterator
  132. * @return array Map of primary key column to value within the row
  133. */
  134. public function extractPrimaryKeys( $row ) {
  135. $pk = [];
  136. foreach ( $this->primaryKey as $alias => $column ) {
  137. $name = is_numeric( $alias ) ? $column : $alias;
  138. $pk[$name] = $row->{$name};
  139. }
  140. return $pk;
  141. }
  142. /**
  143. * @return array The most recently fetched set of rows from the database
  144. */
  145. public function current() {
  146. return $this->current;
  147. }
  148. /**
  149. * @return int 0-indexed count of the page number fetched
  150. */
  151. public function key() {
  152. return $this->key;
  153. }
  154. /**
  155. * Reset the iterator to the begining of the table.
  156. */
  157. public function rewind() {
  158. $this->key = -1; // self::next() will turn this into 0
  159. $this->current = [];
  160. $this->next();
  161. }
  162. /**
  163. * @return bool True when the iterator is in a valid state
  164. */
  165. public function valid() {
  166. return (bool)$this->current;
  167. }
  168. /**
  169. * @return bool True when this result set has rows
  170. */
  171. public function hasChildren() {
  172. return $this->current && count( $this->current );
  173. }
  174. /**
  175. * @return RecursiveIterator
  176. */
  177. public function getChildren() {
  178. return new NotRecursiveIterator( new ArrayIterator( $this->current ) );
  179. }
  180. /**
  181. * Fetch the next set of rows from the database.
  182. */
  183. public function next() {
  184. $res = $this->db->select(
  185. $this->table,
  186. $this->fetchColumns,
  187. $this->buildConditions(),
  188. __METHOD__,
  189. [
  190. 'LIMIT' => $this->batchSize,
  191. 'ORDER BY' => $this->orderBy,
  192. ] + $this->options,
  193. $this->joinConditions
  194. );
  195. // The iterator is converted to an array because in addition to
  196. // returning it in self::current() we need to use the end value
  197. // in self::buildConditions()
  198. $this->current = iterator_to_array( $res );
  199. $this->key++;
  200. }
  201. /**
  202. * Uses the primary key list and the maximal result row from the
  203. * previous iteration to build an SQL condition sufficient for
  204. * selecting the next page of results. All except the final key use
  205. * `=` conditions while the final key uses a `>` condition
  206. *
  207. * Example output:
  208. * [ '( foo = 42 AND bar > 7 ) OR ( foo > 42 )' ]
  209. *
  210. * @return array The SQL conditions necessary to select the next set
  211. * of rows in the batched query
  212. */
  213. protected function buildConditions() {
  214. if ( !$this->current ) {
  215. return $this->conditions;
  216. }
  217. $maxRow = end( $this->current );
  218. $maximumValues = [];
  219. foreach ( $this->primaryKey as $alias => $column ) {
  220. $name = is_numeric( $alias ) ? $column : $alias;
  221. $maximumValues[$column] = $this->db->addQuotes( $maxRow->{$name} );
  222. }
  223. $pkConditions = [];
  224. // For example: If we have 3 primary keys
  225. // first run through will generate
  226. // col1 = 4 AND col2 = 7 AND col3 > 1
  227. // second run through will generate
  228. // col1 = 4 AND col2 > 7
  229. // and the final run through will generate
  230. // col1 > 4
  231. while ( $maximumValues ) {
  232. $pkConditions[] = $this->buildGreaterThanCondition( $maximumValues );
  233. array_pop( $maximumValues );
  234. }
  235. $conditions = $this->conditions;
  236. $conditions[] = sprintf( '( %s )', implode( ' ) OR ( ', $pkConditions ) );
  237. return $conditions;
  238. }
  239. /**
  240. * Given an array of column names and their maximum value generate
  241. * an SQL condition where all keys except the last match $quotedMaximumValues
  242. * exactly and the last column is greater than the matching value in
  243. * $quotedMaximumValues
  244. *
  245. * @param array $quotedMaximumValues The maximum values quoted with
  246. * $this->db->addQuotes()
  247. * @return string An SQL condition that will select rows where all
  248. * columns match the maximum value exactly except the last column
  249. * which must be greater than the provided maximum value
  250. */
  251. protected function buildGreaterThanCondition( array $quotedMaximumValues ) {
  252. $keys = array_keys( $quotedMaximumValues );
  253. $lastColumn = end( $keys );
  254. $lastValue = array_pop( $quotedMaximumValues );
  255. $conditions = [];
  256. foreach ( $quotedMaximumValues as $column => $value ) {
  257. $conditions[] = "$column = $value";
  258. }
  259. $conditions[] = "$lastColumn > $lastValue";
  260. return implode( ' AND ', $conditions );
  261. }
  262. }