Export.php 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921
  1. <?php
  2. # Copyright (C) 2003, 2005, 2006 Brion Vibber <brion@pobox.com>
  3. # http://www.mediawiki.org/
  4. #
  5. # This program is free software; you can redistribute it and/or modify
  6. # it under the terms of the GNU General Public License as published by
  7. # the Free Software Foundation; either version 2 of the License, or
  8. # (at your option) any later version.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License along
  16. # with this program; if not, write to the Free Software Foundation, Inc.,
  17. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. # http://www.gnu.org/copyleft/gpl.html
  19. /**
  20. * @defgroup Dump Dump
  21. */
  22. /**
  23. * @ingroup SpecialPage Dump
  24. */
  25. class WikiExporter {
  26. var $list_authors = false ; # Return distinct author list (when not returning full history)
  27. var $author_list = "" ;
  28. var $dumpUploads = false;
  29. const FULL = 1;
  30. const CURRENT = 2;
  31. const STABLE = 4; // extension defined
  32. const LOGS = 8;
  33. const BUFFER = 0;
  34. const STREAM = 1;
  35. const TEXT = 0;
  36. const STUB = 1;
  37. /**
  38. * If using WikiExporter::STREAM to stream a large amount of data,
  39. * provide a database connection which is not managed by
  40. * LoadBalancer to read from: some history blob types will
  41. * make additional queries to pull source data while the
  42. * main query is still running.
  43. *
  44. * @param $db Database
  45. * @param $history Mixed: one of WikiExporter::FULL or WikiExporter::CURRENT,
  46. * or an associative array:
  47. * offset: non-inclusive offset at which to start the query
  48. * limit: maximum number of rows to return
  49. * dir: "asc" or "desc" timestamp order
  50. * @param $buffer Int: one of WikiExporter::BUFFER or WikiExporter::STREAM
  51. */
  52. function __construct( &$db, $history = WikiExporter::CURRENT,
  53. $buffer = WikiExporter::BUFFER, $text = WikiExporter::TEXT ) {
  54. $this->db =& $db;
  55. $this->history = $history;
  56. $this->buffer = $buffer;
  57. $this->writer = new XmlDumpWriter();
  58. $this->sink = new DumpOutput();
  59. $this->text = $text;
  60. }
  61. /**
  62. * Set the DumpOutput or DumpFilter object which will receive
  63. * various row objects and XML output for filtering. Filters
  64. * can be chained or used as callbacks.
  65. *
  66. * @param $sink mixed
  67. */
  68. public function setOutputSink( &$sink ) {
  69. $this->sink =& $sink;
  70. }
  71. public function openStream() {
  72. $output = $this->writer->openStream();
  73. $this->sink->writeOpenStream( $output );
  74. }
  75. public function closeStream() {
  76. $output = $this->writer->closeStream();
  77. $this->sink->writeCloseStream( $output );
  78. }
  79. /**
  80. * Dumps a series of page and revision records for all pages
  81. * in the database, either including complete history or only
  82. * the most recent version.
  83. */
  84. public function allPages() {
  85. return $this->dumpFrom( '' );
  86. }
  87. /**
  88. * Dumps a series of page and revision records for those pages
  89. * in the database falling within the page_id range given.
  90. * @param $start Int: inclusive lower limit (this id is included)
  91. * @param $end Int: Exclusive upper limit (this id is not included)
  92. * If 0, no upper limit.
  93. */
  94. public function pagesByRange( $start, $end ) {
  95. $condition = 'page_id >= ' . intval( $start );
  96. if( $end ) {
  97. $condition .= ' AND page_id < ' . intval( $end );
  98. }
  99. return $this->dumpFrom( $condition );
  100. }
  101. /**
  102. * @param $title Title
  103. */
  104. public function pageByTitle( $title ) {
  105. return $this->dumpFrom(
  106. 'page_namespace=' . $title->getNamespace() .
  107. ' AND page_title=' . $this->db->addQuotes( $title->getDBkey() ) );
  108. }
  109. public function pageByName( $name ) {
  110. $title = Title::newFromText( $name );
  111. if( is_null( $title ) ) {
  112. return new WikiError( "Can't export invalid title" );
  113. } else {
  114. return $this->pageByTitle( $title );
  115. }
  116. }
  117. public function pagesByName( $names ) {
  118. foreach( $names as $name ) {
  119. $this->pageByName( $name );
  120. }
  121. }
  122. public function allLogs() {
  123. return $this->dumpFrom( '' );
  124. }
  125. public function logsByRange( $start, $end ) {
  126. $condition = 'log_id >= ' . intval( $start );
  127. if( $end ) {
  128. $condition .= ' AND log_id < ' . intval( $end );
  129. }
  130. return $this->dumpFrom( $condition );
  131. }
  132. # Generates the distinct list of authors of an article
  133. # Not called by default (depends on $this->list_authors)
  134. # Can be set by Special:Export when not exporting whole history
  135. protected function do_list_authors( $page , $revision , $cond ) {
  136. $fname = "do_list_authors" ;
  137. wfProfileIn( $fname );
  138. $this->author_list = "<contributors>";
  139. //rev_deleted
  140. $nothidden = '(rev_deleted & '.Revision::DELETED_USER.') = 0';
  141. $sql = "SELECT DISTINCT rev_user_text,rev_user FROM {$page},{$revision}
  142. WHERE page_id=rev_page AND $nothidden AND " . $cond ;
  143. $result = $this->db->query( $sql, $fname );
  144. $resultset = $this->db->resultObject( $result );
  145. while( $row = $resultset->fetchObject() ) {
  146. $this->author_list .= "<contributor>" .
  147. "<username>" .
  148. htmlentities( $row->rev_user_text ) .
  149. "</username>" .
  150. "<id>" .
  151. $row->rev_user .
  152. "</id>" .
  153. "</contributor>";
  154. }
  155. wfProfileOut( $fname );
  156. $this->author_list .= "</contributors>";
  157. }
  158. protected function dumpFrom( $cond = '' ) {
  159. wfProfileIn( __METHOD__ );
  160. # For logging dumps...
  161. if( $this->history & self::LOGS ) {
  162. if( $this->buffer == WikiExporter::STREAM ) {
  163. $prev = $this->db->bufferResults( false );
  164. }
  165. $where = array( 'user_id = log_user' );
  166. # Hide private logs
  167. $hideLogs = LogEventsList::getExcludeClause( $this->db );
  168. if( $hideLogs ) $where[] = $hideLogs;
  169. # Add on any caller specified conditions
  170. if( $cond ) $where[] = $cond;
  171. # Get logging table name for logging.* clause
  172. $logging = $this->db->tableName('logging');
  173. $result = $this->db->select( array('logging','user'),
  174. array( "{$logging}.*", 'user_name' ), // grab the user name
  175. $where,
  176. __METHOD__,
  177. array( 'ORDER BY' => 'log_id', 'USE INDEX' => array('logging' => 'PRIMARY') )
  178. );
  179. $wrapper = $this->db->resultObject( $result );
  180. if( $this->buffer == WikiExporter::STREAM ) {
  181. $this->db->bufferResults( $prev );
  182. }
  183. $this->outputLogStream( $wrapper );
  184. # For page dumps...
  185. } else {
  186. $tables = array( 'page', 'revision' );
  187. $opts = array( 'ORDER BY' => 'page_id ASC' );
  188. $opts['USE INDEX'] = array();
  189. $join = array();
  190. # Full history dumps...
  191. if( $this->history & WikiExporter::FULL ) {
  192. $join['revision'] = array('INNER JOIN','page_id=rev_page');
  193. # Latest revision dumps...
  194. } elseif( $this->history & WikiExporter::CURRENT ) {
  195. if( $this->list_authors && $cond != '' ) { // List authors, if so desired
  196. list($page,$revision) = $this->db->tableNamesN('page','revision');
  197. $this->do_list_authors( $page, $revision, $cond );
  198. }
  199. $join['revision'] = array('INNER JOIN','page_id=rev_page AND page_latest=rev_id');
  200. # "Stable" revision dumps...
  201. } elseif( $this->history & WikiExporter::STABLE ) {
  202. # Default JOIN, to be overridden...
  203. $join['revision'] = array('INNER JOIN','page_id=rev_page AND page_latest=rev_id');
  204. # One, and only one hook should set this, and return false
  205. if( wfRunHooks( 'WikiExporter::dumpStableQuery', array(&$tables,&$opts,&$join) ) ) {
  206. wfProfileOut( __METHOD__ );
  207. return new WikiError( __METHOD__." given invalid history dump type." );
  208. }
  209. # Time offset/limit for all pages/history...
  210. } elseif( is_array( $this->history ) ) {
  211. $revJoin = 'page_id=rev_page';
  212. # Set time order
  213. if( $this->history['dir'] == 'asc' ) {
  214. $op = '>';
  215. $opts['ORDER BY'] = 'rev_timestamp ASC';
  216. } else {
  217. $op = '<';
  218. $opts['ORDER BY'] = 'rev_timestamp DESC';
  219. }
  220. # Set offset
  221. if( !empty( $this->history['offset'] ) ) {
  222. $revJoin .= " AND rev_timestamp $op " .
  223. $this->db->addQuotes( $this->db->timestamp( $this->history['offset'] ) );
  224. }
  225. $join['revision'] = array('INNER JOIN',$revJoin);
  226. # Set query limit
  227. if( !empty( $this->history['limit'] ) ) {
  228. $opts['LIMIT'] = intval( $this->history['limit'] );
  229. }
  230. # Uknown history specification parameter?
  231. } else {
  232. wfProfileOut( __METHOD__ );
  233. return new WikiError( __METHOD__." given invalid history dump type." );
  234. }
  235. # Query optimization hacks
  236. if( $cond == '' ) {
  237. $opts[] = 'STRAIGHT_JOIN';
  238. $opts['USE INDEX']['page'] = 'PRIMARY';
  239. }
  240. # Build text join options
  241. if( $this->text != WikiExporter::STUB ) { // 1-pass
  242. $tables[] = 'text';
  243. $join['text'] = array('INNER JOIN','rev_text_id=old_id');
  244. }
  245. if( $this->buffer == WikiExporter::STREAM ) {
  246. $prev = $this->db->bufferResults( false );
  247. }
  248. # Do the query!
  249. $result = $this->db->select( $tables, '*', $cond, __METHOD__, $opts, $join );
  250. $wrapper = $this->db->resultObject( $result );
  251. # Output dump results
  252. $this->outputPageStream( $wrapper );
  253. if( $this->list_authors ) {
  254. $this->outputPageStream( $wrapper );
  255. }
  256. if( $this->buffer == WikiExporter::STREAM ) {
  257. $this->db->bufferResults( $prev );
  258. }
  259. }
  260. wfProfileOut( __METHOD__ );
  261. }
  262. /**
  263. * Runs through a query result set dumping page and revision records.
  264. * The result set should be sorted/grouped by page to avoid duplicate
  265. * page records in the output.
  266. *
  267. * The result set will be freed once complete. Should be safe for
  268. * streaming (non-buffered) queries, as long as it was made on a
  269. * separate database connection not managed by LoadBalancer; some
  270. * blob storage types will make queries to pull source data.
  271. *
  272. * @param $resultset ResultWrapper
  273. */
  274. protected function outputPageStream( $resultset ) {
  275. $last = null;
  276. while( $row = $resultset->fetchObject() ) {
  277. if( is_null( $last ) ||
  278. $last->page_namespace != $row->page_namespace ||
  279. $last->page_title != $row->page_title ) {
  280. if( isset( $last ) ) {
  281. $output = '';
  282. if( $this->dumpUploads ) {
  283. $output .= $this->writer->writeUploads( $last );
  284. }
  285. $output .= $this->writer->closePage();
  286. $this->sink->writeClosePage( $output );
  287. }
  288. $output = $this->writer->openPage( $row );
  289. $this->sink->writeOpenPage( $row, $output );
  290. $last = $row;
  291. }
  292. $output = $this->writer->writeRevision( $row );
  293. $this->sink->writeRevision( $row, $output );
  294. }
  295. if( isset( $last ) ) {
  296. $output = '';
  297. if( $this->dumpUploads ) {
  298. $output .= $this->writer->writeUploads( $last );
  299. }
  300. $output .= $this->author_list;
  301. $output .= $this->writer->closePage();
  302. $this->sink->writeClosePage( $output );
  303. }
  304. $resultset->free();
  305. }
  306. protected function outputLogStream( $resultset ) {
  307. while( $row = $resultset->fetchObject() ) {
  308. $output = $this->writer->writeLogItem( $row );
  309. $this->sink->writeLogItem( $row, $output );
  310. }
  311. $resultset->free();
  312. }
  313. }
  314. /**
  315. * @ingroup Dump
  316. */
  317. class XmlDumpWriter {
  318. /**
  319. * Returns the export schema version.
  320. * @return string
  321. */
  322. function schemaVersion() {
  323. return "0.3"; // FIXME: upgrade to 0.4 when updated XSD is ready, for the revision deletion bits
  324. }
  325. /**
  326. * Opens the XML output stream's root <mediawiki> element.
  327. * This does not include an xml directive, so is safe to include
  328. * as a subelement in a larger XML stream. Namespace and XML Schema
  329. * references are included.
  330. *
  331. * Output will be encoded in UTF-8.
  332. *
  333. * @return string
  334. */
  335. function openStream() {
  336. global $wgContLanguageCode;
  337. $ver = $this->schemaVersion();
  338. return Xml::element( 'mediawiki', array(
  339. 'xmlns' => "http://www.mediawiki.org/xml/export-$ver/",
  340. 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
  341. 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " .
  342. "http://www.mediawiki.org/xml/export-$ver.xsd",
  343. 'version' => $ver,
  344. 'xml:lang' => $wgContLanguageCode ),
  345. null ) .
  346. "\n" .
  347. $this->siteInfo();
  348. }
  349. function siteInfo() {
  350. $info = array(
  351. $this->sitename(),
  352. $this->homelink(),
  353. $this->generator(),
  354. $this->caseSetting(),
  355. $this->namespaces() );
  356. return " <siteinfo>\n " .
  357. implode( "\n ", $info ) .
  358. "\n </siteinfo>\n";
  359. }
  360. function sitename() {
  361. global $wgSitename;
  362. return Xml::element( 'sitename', array(), $wgSitename );
  363. }
  364. function generator() {
  365. global $wgVersion;
  366. return Xml::element( 'generator', array(), "MediaWiki $wgVersion" );
  367. }
  368. function homelink() {
  369. return Xml::element( 'base', array(), Title::newMainPage()->getFullUrl() );
  370. }
  371. function caseSetting() {
  372. global $wgCapitalLinks;
  373. // "case-insensitive" option is reserved for future
  374. $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive';
  375. return Xml::element( 'case', array(), $sensitivity );
  376. }
  377. function namespaces() {
  378. global $wgContLang;
  379. $spaces = "<namespaces>\n";
  380. foreach( $wgContLang->getFormattedNamespaces() as $ns => $title ) {
  381. $spaces .= ' ' . Xml::element( 'namespace', array( 'key' => $ns ), $title ) . "\n";
  382. }
  383. $spaces .= " </namespaces>";
  384. return $spaces;
  385. }
  386. /**
  387. * Closes the output stream with the closing root element.
  388. * Call when finished dumping things.
  389. */
  390. function closeStream() {
  391. return "</mediawiki>\n";
  392. }
  393. /**
  394. * Opens a <page> section on the output stream, with data
  395. * from the given database row.
  396. *
  397. * @param $row object
  398. * @return string
  399. * @access private
  400. */
  401. function openPage( $row ) {
  402. $out = " <page>\n";
  403. $title = Title::makeTitle( $row->page_namespace, $row->page_title );
  404. $out .= ' ' . Xml::elementClean( 'title', array(), $title->getPrefixedText() ) . "\n";
  405. $out .= ' ' . Xml::element( 'id', array(), strval( $row->page_id ) ) . "\n";
  406. if( '' != $row->page_restrictions ) {
  407. $out .= ' ' . Xml::element( 'restrictions', array(),
  408. strval( $row->page_restrictions ) ) . "\n";
  409. }
  410. return $out;
  411. }
  412. /**
  413. * Closes a <page> section on the output stream.
  414. *
  415. * @access private
  416. */
  417. function closePage() {
  418. return " </page>\n";
  419. }
  420. /**
  421. * Dumps a <revision> section on the output stream, with
  422. * data filled in from the given database row.
  423. *
  424. * @param $row object
  425. * @return string
  426. * @access private
  427. */
  428. function writeRevision( $row ) {
  429. $fname = 'WikiExporter::dumpRev';
  430. wfProfileIn( $fname );
  431. $out = " <revision>\n";
  432. $out .= " " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n";
  433. $out .= $this->writeTimestamp( $row->rev_timestamp );
  434. if( $row->rev_deleted & Revision::DELETED_USER ) {
  435. $out .= " " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
  436. } else {
  437. $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text );
  438. }
  439. if( $row->rev_minor_edit ) {
  440. $out .= " <minor/>\n";
  441. }
  442. if( $row->rev_deleted & Revision::DELETED_COMMENT ) {
  443. $out .= " " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
  444. } elseif( $row->rev_comment != '' ) {
  445. $out .= " " . Xml::elementClean( 'comment', null, strval( $row->rev_comment ) ) . "\n";
  446. }
  447. if( $row->rev_deleted & Revision::DELETED_TEXT ) {
  448. $out .= " " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
  449. } elseif( isset( $row->old_text ) ) {
  450. // Raw text from the database may have invalid chars
  451. $text = strval( Revision::getRevisionText( $row ) );
  452. $out .= " " . Xml::elementClean( 'text',
  453. array( 'xml:space' => 'preserve' ),
  454. strval( $text ) ) . "\n";
  455. } else {
  456. // Stub output
  457. $out .= " " . Xml::element( 'text',
  458. array( 'id' => $row->rev_text_id ),
  459. "" ) . "\n";
  460. }
  461. $out .= " </revision>\n";
  462. wfProfileOut( $fname );
  463. return $out;
  464. }
  465. /**
  466. * Dumps a <logitem> section on the output stream, with
  467. * data filled in from the given database row.
  468. *
  469. * @param $row object
  470. * @return string
  471. * @access private
  472. */
  473. function writeLogItem( $row ) {
  474. $fname = 'WikiExporter::writeLogItem';
  475. wfProfileIn( $fname );
  476. $out = " <logitem>\n";
  477. $out .= " " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n";
  478. $out .= $this->writeTimestamp( $row->log_timestamp );
  479. if( $row->log_deleted & LogPage::DELETED_USER ) {
  480. $out .= " " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
  481. } else {
  482. $out .= $this->writeContributor( $row->log_user, $row->user_name );
  483. }
  484. if( $row->log_deleted & LogPage::DELETED_COMMENT ) {
  485. $out .= " " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
  486. } elseif( $row->log_comment != '' ) {
  487. $out .= " " . Xml::elementClean( 'comment', null, strval( $row->log_comment ) ) . "\n";
  488. }
  489. $out .= " " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n";
  490. $out .= " " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n";
  491. if( $row->log_deleted & LogPage::DELETED_ACTION ) {
  492. $out .= " " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
  493. } else {
  494. $title = Title::makeTitle( $row->log_namespace, $row->log_title );
  495. $out .= " " . Xml::elementClean( 'logtitle', null, $title->getPrefixedText() ) . "\n";
  496. $out .= " " . Xml::elementClean( 'params',
  497. array( 'xml:space' => 'preserve' ),
  498. strval( $row->log_params ) ) . "\n";
  499. }
  500. $out .= " </logitem>\n";
  501. wfProfileOut( $fname );
  502. return $out;
  503. }
  504. function writeTimestamp( $timestamp ) {
  505. $ts = wfTimestamp( TS_ISO_8601, $timestamp );
  506. return " " . Xml::element( 'timestamp', null, $ts ) . "\n";
  507. }
  508. function writeContributor( $id, $text ) {
  509. $out = " <contributor>\n";
  510. if( $id ) {
  511. $out .= " " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n";
  512. $out .= " " . Xml::element( 'id', null, strval( $id ) ) . "\n";
  513. } else {
  514. $out .= " " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n";
  515. }
  516. $out .= " </contributor>\n";
  517. return $out;
  518. }
  519. /**
  520. * Warning! This data is potentially inconsistent. :(
  521. */
  522. function writeUploads( $row ) {
  523. if( $row->page_namespace == NS_IMAGE ) {
  524. $img = wfFindFile( $row->page_title );
  525. if( $img ) {
  526. $out = '';
  527. foreach( array_reverse( $img->getHistory() ) as $ver ) {
  528. $out .= $this->writeUpload( $ver );
  529. }
  530. $out .= $this->writeUpload( $img );
  531. return $out;
  532. }
  533. }
  534. return '';
  535. }
  536. function writeUpload( $file ) {
  537. return " <upload>\n" .
  538. $this->writeTimestamp( $file->getTimestamp() ) .
  539. $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) .
  540. " " . Xml::elementClean( 'comment', null, $file->getDescription() ) . "\n" .
  541. " " . Xml::element( 'filename', null, $file->getName() ) . "\n" .
  542. " " . Xml::element( 'src', null, $file->getFullUrl() ) . "\n" .
  543. " " . Xml::element( 'size', null, $file->getSize() ) . "\n" .
  544. " </upload>\n";
  545. }
  546. }
  547. /**
  548. * Base class for output stream; prints to stdout or buffer or whereever.
  549. * @ingroup Dump
  550. */
  551. class DumpOutput {
  552. function writeOpenStream( $string ) {
  553. $this->write( $string );
  554. }
  555. function writeCloseStream( $string ) {
  556. $this->write( $string );
  557. }
  558. function writeOpenPage( $page, $string ) {
  559. $this->write( $string );
  560. }
  561. function writeClosePage( $string ) {
  562. $this->write( $string );
  563. }
  564. function writeRevision( $rev, $string ) {
  565. $this->write( $string );
  566. }
  567. function writeLogItem( $rev, $string ) {
  568. $this->write( $string );
  569. }
  570. /**
  571. * Override to write to a different stream type.
  572. * @return bool
  573. */
  574. function write( $string ) {
  575. print $string;
  576. }
  577. }
  578. /**
  579. * Stream outputter to send data to a file.
  580. * @ingroup Dump
  581. */
  582. class DumpFileOutput extends DumpOutput {
  583. var $handle;
  584. function DumpFileOutput( $file ) {
  585. $this->handle = fopen( $file, "wt" );
  586. }
  587. function write( $string ) {
  588. fputs( $this->handle, $string );
  589. }
  590. }
  591. /**
  592. * Stream outputter to send data to a file via some filter program.
  593. * Even if compression is available in a library, using a separate
  594. * program can allow us to make use of a multi-processor system.
  595. * @ingroup Dump
  596. */
  597. class DumpPipeOutput extends DumpFileOutput {
  598. function DumpPipeOutput( $command, $file = null ) {
  599. if( !is_null( $file ) ) {
  600. $command .= " > " . wfEscapeShellArg( $file );
  601. }
  602. $this->handle = popen( $command, "w" );
  603. }
  604. }
  605. /**
  606. * Sends dump output via the gzip compressor.
  607. * @ingroup Dump
  608. */
  609. class DumpGZipOutput extends DumpPipeOutput {
  610. function DumpGZipOutput( $file ) {
  611. parent::DumpPipeOutput( "gzip", $file );
  612. }
  613. }
  614. /**
  615. * Sends dump output via the bgzip2 compressor.
  616. * @ingroup Dump
  617. */
  618. class DumpBZip2Output extends DumpPipeOutput {
  619. function DumpBZip2Output( $file ) {
  620. parent::DumpPipeOutput( "bzip2", $file );
  621. }
  622. }
  623. /**
  624. * Sends dump output via the p7zip compressor.
  625. * @ingroup Dump
  626. */
  627. class Dump7ZipOutput extends DumpPipeOutput {
  628. function Dump7ZipOutput( $file ) {
  629. $command = "7za a -bd -si " . wfEscapeShellArg( $file );
  630. // Suppress annoying useless crap from p7zip
  631. // Unfortunately this could suppress real error messages too
  632. $command .= ' >' . wfGetNull() . ' 2>&1';
  633. parent::DumpPipeOutput( $command );
  634. }
  635. }
  636. /**
  637. * Dump output filter class.
  638. * This just does output filtering and streaming; XML formatting is done
  639. * higher up, so be careful in what you do.
  640. * @ingroup Dump
  641. */
  642. class DumpFilter {
  643. function DumpFilter( &$sink ) {
  644. $this->sink =& $sink;
  645. }
  646. function writeOpenStream( $string ) {
  647. $this->sink->writeOpenStream( $string );
  648. }
  649. function writeCloseStream( $string ) {
  650. $this->sink->writeCloseStream( $string );
  651. }
  652. function writeOpenPage( $page, $string ) {
  653. $this->sendingThisPage = $this->pass( $page, $string );
  654. if( $this->sendingThisPage ) {
  655. $this->sink->writeOpenPage( $page, $string );
  656. }
  657. }
  658. function writeClosePage( $string ) {
  659. if( $this->sendingThisPage ) {
  660. $this->sink->writeClosePage( $string );
  661. $this->sendingThisPage = false;
  662. }
  663. }
  664. function writeRevision( $rev, $string ) {
  665. if( $this->sendingThisPage ) {
  666. $this->sink->writeRevision( $rev, $string );
  667. }
  668. }
  669. function writeLogItem( $rev, $string ) {
  670. $this->sink->writeRevision( $rev, $string );
  671. }
  672. /**
  673. * Override for page-based filter types.
  674. * @return bool
  675. */
  676. function pass( $page ) {
  677. return true;
  678. }
  679. }
  680. /**
  681. * Simple dump output filter to exclude all talk pages.
  682. * @ingroup Dump
  683. */
  684. class DumpNotalkFilter extends DumpFilter {
  685. function pass( $page ) {
  686. return !MWNamespace::isTalk( $page->page_namespace );
  687. }
  688. }
  689. /**
  690. * Dump output filter to include or exclude pages in a given set of namespaces.
  691. * @ingroup Dump
  692. */
  693. class DumpNamespaceFilter extends DumpFilter {
  694. var $invert = false;
  695. var $namespaces = array();
  696. function DumpNamespaceFilter( &$sink, $param ) {
  697. parent::DumpFilter( $sink );
  698. $constants = array(
  699. "NS_MAIN" => NS_MAIN,
  700. "NS_TALK" => NS_TALK,
  701. "NS_USER" => NS_USER,
  702. "NS_USER_TALK" => NS_USER_TALK,
  703. "NS_PROJECT" => NS_PROJECT,
  704. "NS_PROJECT_TALK" => NS_PROJECT_TALK,
  705. "NS_FILE" => NS_FILE,
  706. "NS_FILE_TALK" => NS_FILE_TALK,
  707. "NS_IMAGE" => NS_IMAGE, // NS_IMAGE is an alias for NS_FILE
  708. "NS_IMAGE_TALK" => NS_IMAGE_TALK,
  709. "NS_MEDIAWIKI" => NS_MEDIAWIKI,
  710. "NS_MEDIAWIKI_TALK" => NS_MEDIAWIKI_TALK,
  711. "NS_TEMPLATE" => NS_TEMPLATE,
  712. "NS_TEMPLATE_TALK" => NS_TEMPLATE_TALK,
  713. "NS_HELP" => NS_HELP,
  714. "NS_HELP_TALK" => NS_HELP_TALK,
  715. "NS_CATEGORY" => NS_CATEGORY,
  716. "NS_CATEGORY_TALK" => NS_CATEGORY_TALK );
  717. if( $param{0} == '!' ) {
  718. $this->invert = true;
  719. $param = substr( $param, 1 );
  720. }
  721. foreach( explode( ',', $param ) as $key ) {
  722. $key = trim( $key );
  723. if( isset( $constants[$key] ) ) {
  724. $ns = $constants[$key];
  725. $this->namespaces[$ns] = true;
  726. } elseif( is_numeric( $key ) ) {
  727. $ns = intval( $key );
  728. $this->namespaces[$ns] = true;
  729. } else {
  730. throw new MWException( "Unrecognized namespace key '$key'\n" );
  731. }
  732. }
  733. }
  734. function pass( $page ) {
  735. $match = isset( $this->namespaces[$page->page_namespace] );
  736. return $this->invert xor $match;
  737. }
  738. }
  739. /**
  740. * Dump output filter to include only the last revision in each page sequence.
  741. * @ingroup Dump
  742. */
  743. class DumpLatestFilter extends DumpFilter {
  744. var $page, $pageString, $rev, $revString;
  745. function writeOpenPage( $page, $string ) {
  746. $this->page = $page;
  747. $this->pageString = $string;
  748. }
  749. function writeClosePage( $string ) {
  750. if( $this->rev ) {
  751. $this->sink->writeOpenPage( $this->page, $this->pageString );
  752. $this->sink->writeRevision( $this->rev, $this->revString );
  753. $this->sink->writeClosePage( $string );
  754. }
  755. $this->rev = null;
  756. $this->revString = null;
  757. $this->page = null;
  758. $this->pageString = null;
  759. }
  760. function writeRevision( $rev, $string ) {
  761. if( $rev->rev_id == $this->page->page_latest ) {
  762. $this->rev = $rev;
  763. $this->revString = $string;
  764. }
  765. }
  766. }
  767. /**
  768. * Base class for output stream; prints to stdout or buffer or whereever.
  769. * @ingroup Dump
  770. */
  771. class DumpMultiWriter {
  772. function DumpMultiWriter( $sinks ) {
  773. $this->sinks = $sinks;
  774. $this->count = count( $sinks );
  775. }
  776. function writeOpenStream( $string ) {
  777. for( $i = 0; $i < $this->count; $i++ ) {
  778. $this->sinks[$i]->writeOpenStream( $string );
  779. }
  780. }
  781. function writeCloseStream( $string ) {
  782. for( $i = 0; $i < $this->count; $i++ ) {
  783. $this->sinks[$i]->writeCloseStream( $string );
  784. }
  785. }
  786. function writeOpenPage( $page, $string ) {
  787. for( $i = 0; $i < $this->count; $i++ ) {
  788. $this->sinks[$i]->writeOpenPage( $page, $string );
  789. }
  790. }
  791. function writeClosePage( $string ) {
  792. for( $i = 0; $i < $this->count; $i++ ) {
  793. $this->sinks[$i]->writeClosePage( $string );
  794. }
  795. }
  796. function writeRevision( $rev, $string ) {
  797. for( $i = 0; $i < $this->count; $i++ ) {
  798. $this->sinks[$i]->writeRevision( $rev, $string );
  799. }
  800. }
  801. }
  802. function xmlsafe( $string ) {
  803. $fname = 'xmlsafe';
  804. wfProfileIn( $fname );
  805. /**
  806. * The page may contain old data which has not been properly normalized.
  807. * Invalid UTF-8 sequences or forbidden control characters will make our
  808. * XML output invalid, so be sure to strip them out.
  809. */
  810. $string = UtfNormal::cleanUp( $string );
  811. $string = htmlspecialchars( $string );
  812. wfProfileOut( $fname );
  813. return $string;
  814. }