SpecialExport.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. <?php
  2. # Copyright (C) 2003-2008 Brion Vibber <brion@pobox.com>
  3. # http://www.mediawiki.org/
  4. #
  5. # This program is free software; you can redistribute it and/or modify
  6. # it under the terms of the GNU General Public License as published by
  7. # the Free Software Foundation; either version 2 of the License, or
  8. # (at your option) any later version.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License along
  16. # with this program; if not, write to the Free Software Foundation, Inc.,
  17. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. # http://www.gnu.org/copyleft/gpl.html
  19. /**
  20. * @file
  21. * @ingroup SpecialPage
  22. */
  23. class SpecialExport extends SpecialPage {
  24. private $curonly, $doExport, $pageLinkDepth, $templates;
  25. private $images;
  26. public function __construct() {
  27. parent::__construct( 'Export' );
  28. }
  29. public function execute( $par ) {
  30. global $wgOut, $wgRequest, $wgSitename, $wgExportAllowListContributors;
  31. global $wgExportAllowHistory, $wgExportMaxHistory, $wgExportMaxLinkDepth;
  32. global $wgExportFromNamespaces;
  33. $this->setHeaders();
  34. $this->outputHeader();
  35. // Set some variables
  36. $this->curonly = true;
  37. $this->doExport = false;
  38. $this->templates = $wgRequest->getCheck( 'templates' );
  39. $this->images = $wgRequest->getCheck( 'images' ); // Doesn't do anything yet
  40. $this->pageLinkDepth = $this->validateLinkDepth(
  41. $wgRequest->getIntOrNull( 'pagelink-depth' ) );
  42. if ( $wgRequest->getCheck( 'addcat' ) ) {
  43. $page = $wgRequest->getText( 'pages' );
  44. $catname = $wgRequest->getText( 'catname' );
  45. if ( $catname !== '' && $catname !== NULL && $catname !== false ) {
  46. $t = Title::makeTitleSafe( NS_MAIN, $catname );
  47. if ( $t ) {
  48. /**
  49. * @fixme This can lead to hitting memory limit for very large
  50. * categories. Ideally we would do the lookup synchronously
  51. * during the export in a single query.
  52. */
  53. $catpages = $this->getPagesFromCategory( $t );
  54. if ( $catpages ) $page .= "\n" . implode( "\n", $catpages );
  55. }
  56. }
  57. }
  58. else if( $wgRequest->getCheck( 'addns' ) && $wgExportFromNamespaces ) {
  59. $page = $wgRequest->getText( 'pages' );
  60. $nsindex = $wgRequest->getText( 'nsindex' );
  61. if ( $nsindex !== '' && $nsindex !== NULL && $nsindex !== false ) {
  62. /**
  63. * Same implementation as above, so same @fixme
  64. */
  65. $nspages = $this->getPagesFromNamespace( $nsindex );
  66. if ( $nspages ) $page .= "\n" . implode( "\n", $nspages );
  67. }
  68. }
  69. else if( $wgRequest->wasPosted() && $par == '' ) {
  70. $page = $wgRequest->getText( 'pages' );
  71. $this->curonly = $wgRequest->getCheck( 'curonly' );
  72. $rawOffset = $wgRequest->getVal( 'offset' );
  73. if( $rawOffset ) {
  74. $offset = wfTimestamp( TS_MW, $rawOffset );
  75. } else {
  76. $offset = null;
  77. }
  78. $limit = $wgRequest->getInt( 'limit' );
  79. $dir = $wgRequest->getVal( 'dir' );
  80. $history = array(
  81. 'dir' => 'asc',
  82. 'offset' => false,
  83. 'limit' => $wgExportMaxHistory,
  84. );
  85. $historyCheck = $wgRequest->getCheck( 'history' );
  86. if ( $this->curonly ) {
  87. $history = WikiExporter::CURRENT;
  88. } elseif ( !$historyCheck ) {
  89. if ( $limit > 0 && $limit < $wgExportMaxHistory ) {
  90. $history['limit'] = $limit;
  91. }
  92. if ( !is_null( $offset ) ) {
  93. $history['offset'] = $offset;
  94. }
  95. if ( strtolower( $dir ) == 'desc' ) {
  96. $history['dir'] = 'desc';
  97. }
  98. }
  99. if( $page != '' ) $this->doExport = true;
  100. } else {
  101. // Default to current-only for GET requests
  102. $page = $wgRequest->getText( 'pages', $par );
  103. $historyCheck = $wgRequest->getCheck( 'history' );
  104. if( $historyCheck ) {
  105. $history = WikiExporter::FULL;
  106. } else {
  107. $history = WikiExporter::CURRENT;
  108. }
  109. if( $page != '' ) $this->doExport = true;
  110. }
  111. if( !$wgExportAllowHistory ) {
  112. // Override
  113. $history = WikiExporter::CURRENT;
  114. }
  115. $list_authors = $wgRequest->getCheck( 'listauthors' );
  116. if ( !$this->curonly || !$wgExportAllowListContributors ) $list_authors = false ;
  117. if ( $this->doExport ) {
  118. $wgOut->disable();
  119. // Cancel output buffering and gzipping if set
  120. // This should provide safer streaming for pages with history
  121. wfResetOutputBuffers();
  122. header( "Content-type: application/xml; charset=utf-8" );
  123. if( $wgRequest->getCheck( 'wpDownload' ) ) {
  124. // Provide a sane filename suggestion
  125. $filename = urlencode( $wgSitename . '-' . wfTimestampNow() . '.xml' );
  126. $wgRequest->response()->header( "Content-disposition: attachment;filename={$filename}" );
  127. }
  128. $this->doExport( $page, $history, $list_authors );
  129. return;
  130. }
  131. $wgOut->addWikiMsg( 'exporttext' );
  132. $form = Xml::openElement( 'form', array( 'method' => 'post',
  133. 'action' => $this->getTitle()->getLocalUrl( 'action=submit' ) ) );
  134. $form .= Xml::inputLabel( wfMsg( 'export-addcattext' ) , 'catname', 'catname', 40 ) . '&nbsp;';
  135. $form .= Xml::submitButton( wfMsg( 'export-addcat' ), array( 'name' => 'addcat' ) ) . '<br />';
  136. if ( $wgExportFromNamespaces ) {
  137. $form .= Xml::namespaceSelector( '', null, 'nsindex', wfMsg( 'export-addnstext' ) ) . '&nbsp;';
  138. $form .= Xml::submitButton( wfMsg( 'export-addns' ), array( 'name' => 'addns' ) ) . '<br />';
  139. }
  140. $form .= Xml::element( 'textarea', array( 'name' => 'pages', 'cols' => 40, 'rows' => 10 ), $page, false );
  141. $form .= '<br />';
  142. if( $wgExportAllowHistory ) {
  143. $form .= Xml::checkLabel( wfMsg( 'exportcuronly' ), 'curonly', 'curonly', true ) . '<br />';
  144. } else {
  145. $wgOut->addHTML( wfMsgExt( 'exportnohistory', 'parse' ) );
  146. }
  147. $form .= Xml::checkLabel( wfMsg( 'export-templates' ), 'templates', 'wpExportTemplates', false ) . '<br />';
  148. if( $wgExportMaxLinkDepth || $this->userCanOverrideExportDepth() ) {
  149. $form .= Xml::inputLabel( wfMsg( 'export-pagelinks' ), 'pagelink-depth', 'pagelink-depth', 20, 0 ) . '<br />';
  150. }
  151. // Enable this when we can do something useful exporting/importing image information. :)
  152. //$form .= Xml::checkLabel( wfMsg( 'export-images' ), 'images', 'wpExportImages', false ) . '<br />';
  153. $form .= Xml::checkLabel( wfMsg( 'export-download' ), 'wpDownload', 'wpDownload', true ) . '<br />';
  154. $form .= Xml::submitButton( wfMsg( 'export-submit' ), array( 'accesskey' => 's' ) );
  155. $form .= Xml::closeElement( 'form' );
  156. $wgOut->addHTML( $form );
  157. }
  158. private function userCanOverrideExportDepth() {
  159. global $wgUser;
  160. return $wgUser->isAllowed( 'override-export-depth' );
  161. }
  162. /**
  163. * Do the actual page exporting
  164. * @param string $page User input on what page(s) to export
  165. * @param mixed $history one of the WikiExporter history export constants
  166. */
  167. private function doExport( $page, $history, $list_authors ) {
  168. global $wgExportMaxHistory;
  169. /* Split up the input and look up linked pages */
  170. $inputPages = array_filter( explode( "\n", $page ), array( $this, 'filterPage' ) );
  171. $pageSet = array_flip( $inputPages );
  172. if( $this->templates ) {
  173. $pageSet = $this->getTemplates( $inputPages, $pageSet );
  174. }
  175. if( $linkDepth = $this->pageLinkDepth ) {
  176. $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
  177. }
  178. /*
  179. // Enable this when we can do something useful exporting/importing image information. :)
  180. if( $this->images ) ) {
  181. $pageSet = $this->getImages( $inputPages, $pageSet );
  182. }
  183. */
  184. $pages = array_keys( $pageSet );
  185. /* Ok, let's get to it... */
  186. if( $history == WikiExporter::CURRENT ) {
  187. $lb = false;
  188. $db = wfGetDB( DB_SLAVE );
  189. $buffer = WikiExporter::BUFFER;
  190. } else {
  191. // Use an unbuffered query; histories may be very long!
  192. $lb = wfGetLBFactory()->newMainLB();
  193. $db = $lb->getConnection( DB_SLAVE );
  194. $buffer = WikiExporter::STREAM;
  195. // This might take a while... :D
  196. wfSuppressWarnings();
  197. set_time_limit(0);
  198. wfRestoreWarnings();
  199. }
  200. $exporter = new WikiExporter( $db, $history, $buffer );
  201. $exporter->list_authors = $list_authors;
  202. $exporter->openStream();
  203. foreach( $pages as $page ) {
  204. /*
  205. if( $wgExportMaxHistory && !$this->curonly ) {
  206. $title = Title::newFromText( $page );
  207. if( $title ) {
  208. $count = Revision::countByTitle( $db, $title );
  209. if( $count > $wgExportMaxHistory ) {
  210. wfDebug( __FUNCTION__ .
  211. ": Skipped $page, $count revisions too big\n" );
  212. continue;
  213. }
  214. }
  215. }*/
  216. #Bug 8824: Only export pages the user can read
  217. $title = Title::newFromText( $page );
  218. if( is_null( $title ) ) continue; #TODO: perhaps output an <error> tag or something.
  219. if( !$title->userCanRead() ) continue; #TODO: perhaps output an <error> tag or something.
  220. $exporter->pageByTitle( $title );
  221. }
  222. $exporter->closeStream();
  223. if( $lb ) {
  224. $lb->closeAll();
  225. }
  226. }
  227. private function getPagesFromCategory( $title ) {
  228. global $wgContLang;
  229. $name = $title->getDBkey();
  230. $dbr = wfGetDB( DB_SLAVE );
  231. $res = $dbr->select( array('page', 'categorylinks' ),
  232. array( 'page_namespace', 'page_title' ),
  233. array('cl_from=page_id', 'cl_to' => $name ),
  234. __METHOD__, array('LIMIT' => '5000'));
  235. $pages = array();
  236. while ( $row = $dbr->fetchObject( $res ) ) {
  237. $n = $row->page_title;
  238. if ($row->page_namespace) {
  239. $ns = $wgContLang->getNsText( $row->page_namespace );
  240. $n = $ns . ':' . $n;
  241. }
  242. $pages[] = $n;
  243. }
  244. $dbr->freeResult($res);
  245. return $pages;
  246. }
  247. private function getPagesFromNamespace( $nsindex ) {
  248. global $wgContLang;
  249. $dbr = wfGetDB( DB_SLAVE );
  250. $res = $dbr->select( 'page', array('page_namespace', 'page_title'),
  251. array('page_namespace' => $nsindex),
  252. __METHOD__, array('LIMIT' => '5000') );
  253. $pages = array();
  254. while ( $row = $dbr->fetchObject( $res ) ) {
  255. $n = $row->page_title;
  256. if ($row->page_namespace) {
  257. $ns = $wgContLang->getNsText( $row->page_namespace );
  258. $n = $ns . ':' . $n;
  259. }
  260. $pages[] = $n;
  261. }
  262. $dbr->freeResult($res);
  263. return $pages;
  264. }
  265. /**
  266. * Expand a list of pages to include templates used in those pages.
  267. * @param $inputPages array, list of titles to look up
  268. * @param $pageSet array, associative array indexed by titles for output
  269. * @return array associative array index by titles
  270. */
  271. private function getTemplates( $inputPages, $pageSet ) {
  272. return $this->getLinks( $inputPages, $pageSet,
  273. 'templatelinks',
  274. array( 'tl_namespace AS namespace', 'tl_title AS title' ),
  275. array( 'page_id=tl_from' ) );
  276. }
  277. /**
  278. * Validate link depth setting, if available.
  279. */
  280. private function validateLinkDepth( $depth ) {
  281. global $wgExportMaxLinkDepth, $wgExportMaxLinkDepthLimit;
  282. if( $depth < 0 ) {
  283. return 0;
  284. }
  285. if ( !$this->userCanOverrideExportDepth() ) {
  286. if( $depth > $wgExportMaxLinkDepth ) {
  287. return $wgExportMaxLinkDepth;
  288. }
  289. }
  290. /*
  291. * There's a HARD CODED limit of 5 levels of recursion here to prevent a
  292. * crazy-big export from being done by someone setting the depth
  293. * number too high. In other words, last resort safety net.
  294. */
  295. return intval( min( $depth, 5 ) );
  296. }
  297. /** Expand a list of pages to include pages linked to from that page. */
  298. private function getPageLinks( $inputPages, $pageSet, $depth ) {
  299. for( $depth=$depth; $depth>0; --$depth ) {
  300. $pageSet = $this->getLinks( $inputPages, $pageSet, 'pagelinks',
  301. array( 'pl_namespace AS namespace', 'pl_title AS title' ),
  302. array( 'page_id=pl_from' ) );
  303. $inputPages = array_keys( $pageSet );
  304. }
  305. return $pageSet;
  306. }
  307. /**
  308. * Expand a list of pages to include images used in those pages.
  309. * @param $inputPages array, list of titles to look up
  310. * @param $pageSet array, associative array indexed by titles for output
  311. * @return array associative array index by titles
  312. */
  313. private function getImages( $inputPages, $pageSet ) {
  314. return $this->getLinks( $inputPages, $pageSet,
  315. 'imagelinks',
  316. array( NS_FILE . ' AS namespace', 'il_to AS title' ),
  317. array( 'page_id=il_from' ) );
  318. }
  319. /**
  320. * Expand a list of pages to include items used in those pages.
  321. * @private
  322. */
  323. private function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
  324. $dbr = wfGetDB( DB_SLAVE );
  325. foreach( $inputPages as $page ) {
  326. $title = Title::newFromText( $page );
  327. if( $title ) {
  328. $pageSet[$title->getPrefixedText()] = true;
  329. /// @fixme May or may not be more efficient to batch these
  330. /// by namespace when given multiple input pages.
  331. $result = $dbr->select(
  332. array( 'page', $table ),
  333. $fields,
  334. array_merge( $join,
  335. array(
  336. 'page_namespace' => $title->getNamespace(),
  337. 'page_title' => $title->getDBKey() ) ),
  338. __METHOD__ );
  339. foreach( $result as $row ) {
  340. $template = Title::makeTitle( $row->namespace, $row->title );
  341. $pageSet[$template->getPrefixedText()] = true;
  342. }
  343. }
  344. }
  345. return $pageSet;
  346. }
  347. /**
  348. * Callback function to remove empty strings from the pages array.
  349. */
  350. private function filterPage( $page ) {
  351. return $page !== '' && $page !== null;
  352. }
  353. }