Import.php 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137
  1. <?php
  2. /**
  3. * MediaWiki page data importer
  4. * Copyright (C) 2003,2005 Brion Vibber <brion@pobox.com>
  5. * http://www.mediawiki.org/
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License along
  18. * with this program; if not, write to the Free Software Foundation, Inc.,
  19. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20. * http://www.gnu.org/copyleft/gpl.html
  21. *
  22. * @file
  23. * @ingroup SpecialPage
  24. */
  25. /**
  26. *
  27. * @ingroup SpecialPage
  28. */
  29. class WikiRevision {
  30. var $title = null;
  31. var $id = 0;
  32. var $timestamp = "20010115000000";
  33. var $user = 0;
  34. var $user_text = "";
  35. var $text = "";
  36. var $comment = "";
  37. var $minor = false;
  38. var $type = "";
  39. var $action = "";
  40. var $params = "";
  41. function setTitle( $title ) {
  42. if( is_object( $title ) ) {
  43. $this->title = $title;
  44. } elseif( is_null( $title ) ) {
  45. throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." );
  46. } else {
  47. throw new MWException( "WikiRevision given non-object title in import." );
  48. }
  49. }
  50. function setID( $id ) {
  51. $this->id = $id;
  52. }
  53. function setTimestamp( $ts ) {
  54. # 2003-08-05T18:30:02Z
  55. $this->timestamp = wfTimestamp( TS_MW, $ts );
  56. }
  57. function setUsername( $user ) {
  58. $this->user_text = $user;
  59. }
  60. function setUserIP( $ip ) {
  61. $this->user_text = $ip;
  62. }
  63. function setText( $text ) {
  64. $this->text = $text;
  65. }
  66. function setComment( $text ) {
  67. $this->comment = $text;
  68. }
  69. function setMinor( $minor ) {
  70. $this->minor = (bool)$minor;
  71. }
  72. function setSrc( $src ) {
  73. $this->src = $src;
  74. }
  75. function setFilename( $filename ) {
  76. $this->filename = $filename;
  77. }
  78. function setSize( $size ) {
  79. $this->size = intval( $size );
  80. }
  81. function setType( $type ) {
  82. $this->type = $type;
  83. }
  84. function setAction( $action ) {
  85. $this->action = $action;
  86. }
  87. function setParams( $params ) {
  88. $this->params = $params;
  89. }
  90. function getTitle() {
  91. return $this->title;
  92. }
  93. function getID() {
  94. return $this->id;
  95. }
  96. function getTimestamp() {
  97. return $this->timestamp;
  98. }
  99. function getUser() {
  100. return $this->user_text;
  101. }
  102. function getText() {
  103. return $this->text;
  104. }
  105. function getComment() {
  106. return $this->comment;
  107. }
  108. function getMinor() {
  109. return $this->minor;
  110. }
  111. function getSrc() {
  112. return $this->src;
  113. }
  114. function getFilename() {
  115. return $this->filename;
  116. }
  117. function getSize() {
  118. return $this->size;
  119. }
  120. function getType() {
  121. return $this->type;
  122. }
  123. function getAction() {
  124. return $this->action;
  125. }
  126. function getParams() {
  127. return $this->params;
  128. }
  129. function importOldRevision() {
  130. $dbw = wfGetDB( DB_MASTER );
  131. # Sneak a single revision into place
  132. $user = User::newFromName( $this->getUser() );
  133. if( $user ) {
  134. $userId = intval( $user->getId() );
  135. $userText = $user->getName();
  136. } else {
  137. $userId = 0;
  138. $userText = $this->getUser();
  139. }
  140. // avoid memory leak...?
  141. $linkCache = LinkCache::singleton();
  142. $linkCache->clear();
  143. $article = new Article( $this->title );
  144. $pageId = $article->getId();
  145. if( $pageId == 0 ) {
  146. # must create the page...
  147. $pageId = $article->insertOn( $dbw );
  148. $created = true;
  149. } else {
  150. $created = false;
  151. $prior = $dbw->selectField( 'revision', '1',
  152. array( 'rev_page' => $pageId,
  153. 'rev_timestamp' => $dbw->timestamp( $this->timestamp ),
  154. 'rev_user_text' => $userText,
  155. 'rev_comment' => $this->getComment() ),
  156. __METHOD__
  157. );
  158. if( $prior ) {
  159. // FIXME: this could fail slightly for multiple matches :P
  160. wfDebug( __METHOD__ . ": skipping existing revision for [[" .
  161. $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
  162. return false;
  163. }
  164. }
  165. # FIXME: Use original rev_id optionally (better for backups)
  166. # Insert the row
  167. $revision = new Revision( array(
  168. 'page' => $pageId,
  169. 'text' => $this->getText(),
  170. 'comment' => $this->getComment(),
  171. 'user' => $userId,
  172. 'user_text' => $userText,
  173. 'timestamp' => $this->timestamp,
  174. 'minor_edit' => $this->minor,
  175. ) );
  176. $revId = $revision->insertOn( $dbw );
  177. $changed = $article->updateIfNewerOn( $dbw, $revision );
  178. # To be on the safe side...
  179. $tempTitle = $GLOBALS['wgTitle'];
  180. $GLOBALS['wgTitle'] = $this->title;
  181. if( $created ) {
  182. wfDebug( __METHOD__ . ": running onArticleCreate\n" );
  183. Article::onArticleCreate( $this->title );
  184. wfDebug( __METHOD__ . ": running create updates\n" );
  185. $article->createUpdates( $revision );
  186. } elseif( $changed ) {
  187. wfDebug( __METHOD__ . ": running onArticleEdit\n" );
  188. Article::onArticleEdit( $this->title );
  189. wfDebug( __METHOD__ . ": running edit updates\n" );
  190. $article->editUpdates(
  191. $this->getText(),
  192. $this->getComment(),
  193. $this->minor,
  194. $this->timestamp,
  195. $revId );
  196. }
  197. $GLOBALS['wgTitle'] = $tempTitle;
  198. return true;
  199. }
  200. function importLogItem() {
  201. $dbw = wfGetDB( DB_MASTER );
  202. # FIXME: this will not record autoblocks
  203. if( !$this->getTitle() ) {
  204. wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " .
  205. $this->timestamp . "\n" );
  206. return;
  207. }
  208. # Check if it exists already
  209. // FIXME: use original log ID (better for backups)
  210. $prior = $dbw->selectField( 'logging', '1',
  211. array( 'log_type' => $this->getType(),
  212. 'log_action' => $this->getAction(),
  213. 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
  214. 'log_namespace' => $this->getTitle()->getNamespace(),
  215. 'log_title' => $this->getTitle()->getDBkey(),
  216. 'log_comment' => $this->getComment(),
  217. #'log_user_text' => $this->user_text,
  218. 'log_params' => $this->params ),
  219. __METHOD__
  220. );
  221. // FIXME: this could fail slightly for multiple matches :P
  222. if( $prior ) {
  223. wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " .
  224. $this->timestamp . "\n" );
  225. return false;
  226. }
  227. $log_id = $dbw->nextSequenceValue( 'log_log_id_seq' );
  228. $data = array(
  229. 'log_id' => $log_id,
  230. 'log_type' => $this->type,
  231. 'log_action' => $this->action,
  232. 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
  233. 'log_user' => User::idFromName( $this->user_text ),
  234. #'log_user_text' => $this->user_text,
  235. 'log_namespace' => $this->getTitle()->getNamespace(),
  236. 'log_title' => $this->getTitle()->getDBkey(),
  237. 'log_comment' => $this->getComment(),
  238. 'log_params' => $this->params
  239. );
  240. $dbw->insert( 'logging', $data, __METHOD__ );
  241. }
  242. function importUpload() {
  243. wfDebug( __METHOD__ . ": STUB\n" );
  244. /**
  245. // from file revert...
  246. $source = $this->file->getArchiveVirtualUrl( $this->oldimage );
  247. $comment = $wgRequest->getText( 'wpComment' );
  248. // TODO: Preserve file properties from database instead of reloading from file
  249. $status = $this->file->upload( $source, $comment, $comment );
  250. if( $status->isGood() ) {
  251. */
  252. /**
  253. // from file upload...
  254. $this->mLocalFile = wfLocalFile( $nt );
  255. $this->mDestName = $this->mLocalFile->getName();
  256. //....
  257. $status = $this->mLocalFile->upload( $this->mTempPath, $this->mComment, $pageText,
  258. File::DELETE_SOURCE, $this->mFileProps );
  259. if ( !$status->isGood() ) {
  260. $resultDetails = array( 'internal' => $status->getWikiText() );
  261. */
  262. // @fixme upload() uses $wgUser, which is wrong here
  263. // it may also create a page without our desire, also wrong potentially.
  264. // and, it will record a *current* upload, but we might want an archive version here
  265. $file = wfLocalFile( $this->getTitle() );
  266. if( !$file ) {
  267. var_dump( $file );
  268. wfDebug( "IMPORT: Bad file. :(\n" );
  269. return false;
  270. }
  271. $source = $this->downloadSource();
  272. if( !$source ) {
  273. wfDebug( "IMPORT: Could not fetch remote file. :(\n" );
  274. return false;
  275. }
  276. $status = $file->upload( $source,
  277. $this->getComment(),
  278. $this->getComment(), // Initial page, if none present...
  279. File::DELETE_SOURCE,
  280. false, // props...
  281. $this->getTimestamp() );
  282. if( $status->isGood() ) {
  283. // yay?
  284. wfDebug( "IMPORT: is ok?\n" );
  285. return true;
  286. }
  287. wfDebug( "IMPORT: is bad? " . $status->getXml() . "\n" );
  288. return false;
  289. }
  290. function downloadSource() {
  291. global $wgEnableUploads;
  292. if( !$wgEnableUploads ) {
  293. return false;
  294. }
  295. $tempo = tempnam( wfTempDir(), 'download' );
  296. $f = fopen( $tempo, 'wb' );
  297. if( !$f ) {
  298. wfDebug( "IMPORT: couldn't write to temp file $tempo\n" );
  299. return false;
  300. }
  301. // @fixme!
  302. $src = $this->getSrc();
  303. $data = Http::get( $src );
  304. if( !$data ) {
  305. wfDebug( "IMPORT: couldn't fetch source $src\n" );
  306. fclose( $f );
  307. unlink( $tempo );
  308. return false;
  309. }
  310. fwrite( $f, $data );
  311. fclose( $f );
  312. return $tempo;
  313. }
  314. }
  315. /**
  316. * implements Special:Import
  317. * @ingroup SpecialPage
  318. */
  319. class WikiImporter {
  320. var $mDebug = false;
  321. var $mSource = null;
  322. var $mPageCallback = null;
  323. var $mPageOutCallback = null;
  324. var $mRevisionCallback = null;
  325. var $mLogItemCallback = null;
  326. var $mUploadCallback = null;
  327. var $mTargetNamespace = null;
  328. var $mXmlNamespace = false;
  329. var $lastfield;
  330. var $tagStack = array();
  331. function __construct( $source ) {
  332. $this->setRevisionCallback( array( $this, "importRevision" ) );
  333. $this->setUploadCallback( array( $this, "importUpload" ) );
  334. $this->setLogItemCallback( array( $this, "importLogItem" ) );
  335. $this->mSource = $source;
  336. }
  337. function throwXmlError( $err ) {
  338. $this->debug( "FAILURE: $err" );
  339. wfDebug( "WikiImporter XML error: $err\n" );
  340. }
  341. function handleXmlNamespace ( $parser, $data, $prefix=false, $uri=false ) {
  342. if( preg_match( '/www.mediawiki.org/',$prefix ) ) {
  343. $prefix = str_replace( '/','\/',$prefix );
  344. $this->mXmlNamespace='/^'.$prefix.':/';
  345. }
  346. }
  347. function stripXmlNamespace($name) {
  348. if( $this->mXmlNamespace ) {
  349. return(preg_replace($this->mXmlNamespace,'',$name,1));
  350. }
  351. else {
  352. return($name);
  353. }
  354. }
  355. # --------------
  356. function doImport() {
  357. if( empty( $this->mSource ) ) {
  358. return new WikiErrorMsg( "importnotext" );
  359. }
  360. $parser = xml_parser_create_ns( "UTF-8" );
  361. # case folding violates XML standard, turn it off
  362. xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
  363. xml_set_object( $parser, $this );
  364. xml_set_element_handler( $parser, "in_start", "" );
  365. xml_set_start_namespace_decl_handler( $parser, "handleXmlNamespace" );
  366. $offset = 0; // for context extraction on error reporting
  367. do {
  368. $chunk = $this->mSource->readChunk();
  369. if( !xml_parse( $parser, $chunk, $this->mSource->atEnd() ) ) {
  370. wfDebug( "WikiImporter::doImport encountered XML parsing error\n" );
  371. return new WikiXmlError( $parser, wfMsgHtml( 'import-parse-failure' ), $chunk, $offset );
  372. }
  373. $offset += strlen( $chunk );
  374. } while( $chunk !== false && !$this->mSource->atEnd() );
  375. xml_parser_free( $parser );
  376. return true;
  377. }
  378. function debug( $data ) {
  379. if( $this->mDebug ) {
  380. wfDebug( "IMPORT: $data\n" );
  381. }
  382. }
  383. function notice( $data ) {
  384. global $wgCommandLineMode;
  385. if( $wgCommandLineMode ) {
  386. print "$data\n";
  387. } else {
  388. global $wgOut;
  389. $wgOut->addHTML( "<li>" . htmlspecialchars( $data ) . "</li>\n" );
  390. }
  391. }
  392. /**
  393. * Set debug mode...
  394. */
  395. function setDebug( $debug ) {
  396. $this->mDebug = $debug;
  397. }
  398. /**
  399. * Sets the action to perform as each new page in the stream is reached.
  400. * @param $callback callback
  401. * @return callback
  402. */
  403. function setPageCallback( $callback ) {
  404. $previous = $this->mPageCallback;
  405. $this->mPageCallback = $callback;
  406. return $previous;
  407. }
  408. /**
  409. * Sets the action to perform as each page in the stream is completed.
  410. * Callback accepts the page title (as a Title object), a second object
  411. * with the original title form (in case it's been overridden into a
  412. * local namespace), and a count of revisions.
  413. *
  414. * @param $callback callback
  415. * @return callback
  416. */
  417. function setPageOutCallback( $callback ) {
  418. $previous = $this->mPageOutCallback;
  419. $this->mPageOutCallback = $callback;
  420. return $previous;
  421. }
  422. /**
  423. * Sets the action to perform as each page revision is reached.
  424. * @param $callback callback
  425. * @return callback
  426. */
  427. function setRevisionCallback( $callback ) {
  428. $previous = $this->mRevisionCallback;
  429. $this->mRevisionCallback = $callback;
  430. return $previous;
  431. }
  432. /**
  433. * Sets the action to perform as each file upload version is reached.
  434. * @param $callback callback
  435. * @return callback
  436. */
  437. function setUploadCallback( $callback ) {
  438. $previous = $this->mUploadCallback;
  439. $this->mUploadCallback = $callback;
  440. return $previous;
  441. }
  442. /**
  443. * Sets the action to perform as each log item reached.
  444. * @param $callback callback
  445. * @return callback
  446. */
  447. function setLogItemCallback( $callback ) {
  448. $previous = $this->mLogItemCallback;
  449. $this->mLogItemCallback = $callback;
  450. return $previous;
  451. }
  452. /**
  453. * Set a target namespace to override the defaults
  454. */
  455. function setTargetNamespace( $namespace ) {
  456. if( is_null( $namespace ) ) {
  457. // Don't override namespaces
  458. $this->mTargetNamespace = null;
  459. } elseif( $namespace >= 0 ) {
  460. // FIXME: Check for validity
  461. $this->mTargetNamespace = intval( $namespace );
  462. } else {
  463. return false;
  464. }
  465. }
  466. /**
  467. * Default per-revision callback, performs the import.
  468. * @param $revision WikiRevision
  469. * @private
  470. */
  471. function importRevision( $revision ) {
  472. $dbw = wfGetDB( DB_MASTER );
  473. return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) );
  474. }
  475. /**
  476. * Default per-revision callback, performs the import.
  477. * @param $revision WikiRevision
  478. * @private
  479. */
  480. function importLogItem( $rev ) {
  481. $dbw = wfGetDB( DB_MASTER );
  482. return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) );
  483. }
  484. /**
  485. * Dummy for now...
  486. */
  487. function importUpload( $revision ) {
  488. //$dbw = wfGetDB( DB_MASTER );
  489. //return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
  490. return false;
  491. }
  492. /**
  493. * Alternate per-revision callback, for debugging.
  494. * @param $revision WikiRevision
  495. * @private
  496. */
  497. function debugRevisionHandler( &$revision ) {
  498. $this->debug( "Got revision:" );
  499. if( is_object( $revision->title ) ) {
  500. $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
  501. } else {
  502. $this->debug( "-- Title: <invalid>" );
  503. }
  504. $this->debug( "-- User: " . $revision->user_text );
  505. $this->debug( "-- Timestamp: " . $revision->timestamp );
  506. $this->debug( "-- Comment: " . $revision->comment );
  507. $this->debug( "-- Text: " . $revision->text );
  508. }
  509. /**
  510. * Notify the callback function when a new <page> is reached.
  511. * @param $title Title
  512. * @private
  513. */
  514. function pageCallback( $title ) {
  515. if( is_callable( $this->mPageCallback ) ) {
  516. call_user_func( $this->mPageCallback, $title );
  517. }
  518. }
  519. /**
  520. * Notify the callback function when a </page> is closed.
  521. * @param $title Title
  522. * @param $origTitle Title
  523. * @param $revisionCount int
  524. * @param $successCount Int: number of revisions for which callback returned true
  525. * @private
  526. */
  527. function pageOutCallback( $title, $origTitle, $revisionCount, $successCount ) {
  528. if( is_callable( $this->mPageOutCallback ) ) {
  529. call_user_func( $this->mPageOutCallback, $title, $origTitle,
  530. $revisionCount, $successCount );
  531. }
  532. }
  533. # XML parser callbacks from here out -- beware!
  534. function donothing( $parser, $x, $y="" ) {
  535. #$this->debug( "donothing" );
  536. }
  537. function in_start( $parser, $name, $attribs ) {
  538. $name = $this->stripXmlNamespace($name);
  539. $this->debug( "in_start $name" );
  540. if( $name != "mediawiki" ) {
  541. return $this->throwXMLerror( "Expected <mediawiki>, got <$name>" );
  542. }
  543. xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
  544. }
  545. function in_mediawiki( $parser, $name, $attribs ) {
  546. $name = $this->stripXmlNamespace($name);
  547. $this->debug( "in_mediawiki $name" );
  548. if( $name == 'siteinfo' ) {
  549. xml_set_element_handler( $parser, "in_siteinfo", "out_siteinfo" );
  550. } elseif( $name == 'page' ) {
  551. $this->push( $name );
  552. $this->workRevisionCount = 0;
  553. $this->workSuccessCount = 0;
  554. $this->uploadCount = 0;
  555. $this->uploadSuccessCount = 0;
  556. xml_set_element_handler( $parser, "in_page", "out_page" );
  557. } elseif( $name == 'logitem' ) {
  558. $this->push( $name );
  559. $this->workRevision = new WikiRevision;
  560. xml_set_element_handler( $parser, "in_logitem", "out_logitem" );
  561. } else {
  562. return $this->throwXMLerror( "Expected <page>, got <$name>" );
  563. }
  564. }
  565. function out_mediawiki( $parser, $name ) {
  566. $name = $this->stripXmlNamespace($name);
  567. $this->debug( "out_mediawiki $name" );
  568. if( $name != "mediawiki" ) {
  569. return $this->throwXMLerror( "Expected </mediawiki>, got </$name>" );
  570. }
  571. xml_set_element_handler( $parser, "donothing", "donothing" );
  572. }
  573. function in_siteinfo( $parser, $name, $attribs ) {
  574. // no-ops for now
  575. $name = $this->stripXmlNamespace($name);
  576. $this->debug( "in_siteinfo $name" );
  577. switch( $name ) {
  578. case "sitename":
  579. case "base":
  580. case "generator":
  581. case "case":
  582. case "namespaces":
  583. case "namespace":
  584. break;
  585. default:
  586. return $this->throwXMLerror( "Element <$name> not allowed in <siteinfo>." );
  587. }
  588. }
  589. function out_siteinfo( $parser, $name ) {
  590. $name = $this->stripXmlNamespace($name);
  591. if( $name == "siteinfo" ) {
  592. xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
  593. }
  594. }
  595. function in_page( $parser, $name, $attribs ) {
  596. $name = $this->stripXmlNamespace($name);
  597. $this->debug( "in_page $name" );
  598. switch( $name ) {
  599. case "id":
  600. case "title":
  601. case "restrictions":
  602. $this->appendfield = $name;
  603. $this->appenddata = "";
  604. xml_set_element_handler( $parser, "in_nothing", "out_append" );
  605. xml_set_character_data_handler( $parser, "char_append" );
  606. break;
  607. case "revision":
  608. $this->push( "revision" );
  609. if( is_object( $this->pageTitle ) ) {
  610. $this->workRevision = new WikiRevision;
  611. $this->workRevision->setTitle( $this->pageTitle );
  612. $this->workRevisionCount++;
  613. } else {
  614. // Skipping items due to invalid page title
  615. $this->workRevision = null;
  616. }
  617. xml_set_element_handler( $parser, "in_revision", "out_revision" );
  618. break;
  619. case "upload":
  620. $this->push( "upload" );
  621. if( is_object( $this->pageTitle ) ) {
  622. $this->workRevision = new WikiRevision;
  623. $this->workRevision->setTitle( $this->pageTitle );
  624. $this->uploadCount++;
  625. } else {
  626. // Skipping items due to invalid page title
  627. $this->workRevision = null;
  628. }
  629. xml_set_element_handler( $parser, "in_upload", "out_upload" );
  630. break;
  631. default:
  632. return $this->throwXMLerror( "Element <$name> not allowed in a <page>." );
  633. }
  634. }
  635. function out_page( $parser, $name ) {
  636. $name = $this->stripXmlNamespace($name);
  637. $this->debug( "out_page $name" );
  638. $this->pop();
  639. if( $name != "page" ) {
  640. return $this->throwXMLerror( "Expected </page>, got </$name>" );
  641. }
  642. xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
  643. $this->pageOutCallback( $this->pageTitle, $this->origTitle,
  644. $this->workRevisionCount, $this->workSuccessCount );
  645. $this->workTitle = null;
  646. $this->workRevision = null;
  647. $this->workRevisionCount = 0;
  648. $this->workSuccessCount = 0;
  649. $this->pageTitle = null;
  650. $this->origTitle = null;
  651. }
  652. function in_nothing( $parser, $name, $attribs ) {
  653. $name = $this->stripXmlNamespace($name);
  654. $this->debug( "in_nothing $name" );
  655. return $this->throwXMLerror( "No child elements allowed here; got <$name>" );
  656. }
  657. function char_append( $parser, $data ) {
  658. $this->debug( "char_append '$data'" );
  659. $this->appenddata .= $data;
  660. }
  661. function out_append( $parser, $name ) {
  662. $name = $this->stripXmlNamespace($name);
  663. $this->debug( "out_append $name" );
  664. if( $name != $this->appendfield ) {
  665. return $this->throwXMLerror( "Expected </{$this->appendfield}>, got </$name>" );
  666. }
  667. switch( $this->appendfield ) {
  668. case "title":
  669. $this->workTitle = $this->appenddata;
  670. $this->origTitle = Title::newFromText( $this->workTitle );
  671. if( !is_null( $this->mTargetNamespace ) && !is_null( $this->origTitle ) ) {
  672. $this->pageTitle = Title::makeTitle( $this->mTargetNamespace,
  673. $this->origTitle->getDBkey() );
  674. } else {
  675. $this->pageTitle = Title::newFromText( $this->workTitle );
  676. }
  677. if( is_null( $this->pageTitle ) ) {
  678. // Invalid page title? Ignore the page
  679. $this->notice( "Skipping invalid page title '$this->workTitle'" );
  680. } elseif( $this->pageTitle->getInterwiki() != '' ) {
  681. $this->notice( "Skipping interwiki page title '$this->workTitle'" );
  682. $this->pageTitle = null;
  683. } else {
  684. $this->pageCallback( $this->workTitle );
  685. }
  686. break;
  687. case "id":
  688. if ( $this->parentTag() == 'revision' || $this->parentTag() == 'logitem' ) {
  689. if( $this->workRevision )
  690. $this->workRevision->setID( $this->appenddata );
  691. }
  692. break;
  693. case "text":
  694. if( $this->workRevision )
  695. $this->workRevision->setText( $this->appenddata );
  696. break;
  697. case "username":
  698. if( $this->workRevision )
  699. $this->workRevision->setUsername( $this->appenddata );
  700. break;
  701. case "ip":
  702. if( $this->workRevision )
  703. $this->workRevision->setUserIP( $this->appenddata );
  704. break;
  705. case "timestamp":
  706. if( $this->workRevision )
  707. $this->workRevision->setTimestamp( $this->appenddata );
  708. break;
  709. case "comment":
  710. if( $this->workRevision )
  711. $this->workRevision->setComment( $this->appenddata );
  712. break;
  713. case "type":
  714. if( $this->workRevision )
  715. $this->workRevision->setType( $this->appenddata );
  716. break;
  717. case "action":
  718. if( $this->workRevision )
  719. $this->workRevision->setAction( $this->appenddata );
  720. break;
  721. case "logtitle":
  722. if( $this->workRevision )
  723. $this->workRevision->setTitle( Title::newFromText( $this->appenddata ) );
  724. break;
  725. case "params":
  726. if( $this->workRevision )
  727. $this->workRevision->setParams( $this->appenddata );
  728. break;
  729. case "minor":
  730. if( $this->workRevision )
  731. $this->workRevision->setMinor( true );
  732. break;
  733. case "filename":
  734. if( $this->workRevision )
  735. $this->workRevision->setFilename( $this->appenddata );
  736. break;
  737. case "src":
  738. if( $this->workRevision )
  739. $this->workRevision->setSrc( $this->appenddata );
  740. break;
  741. case "size":
  742. if( $this->workRevision )
  743. $this->workRevision->setSize( intval( $this->appenddata ) );
  744. break;
  745. default:
  746. $this->debug( "Bad append: {$this->appendfield}" );
  747. }
  748. $this->appendfield = "";
  749. $this->appenddata = "";
  750. $parent = $this->parentTag();
  751. xml_set_element_handler( $parser, "in_$parent", "out_$parent" );
  752. xml_set_character_data_handler( $parser, "donothing" );
  753. }
  754. function in_revision( $parser, $name, $attribs ) {
  755. $name = $this->stripXmlNamespace($name);
  756. $this->debug( "in_revision $name" );
  757. switch( $name ) {
  758. case "id":
  759. case "timestamp":
  760. case "comment":
  761. case "minor":
  762. case "text":
  763. $this->appendfield = $name;
  764. xml_set_element_handler( $parser, "in_nothing", "out_append" );
  765. xml_set_character_data_handler( $parser, "char_append" );
  766. break;
  767. case "contributor":
  768. $this->push( "contributor" );
  769. xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
  770. break;
  771. default:
  772. return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
  773. }
  774. }
  775. function out_revision( $parser, $name ) {
  776. $name = $this->stripXmlNamespace($name);
  777. $this->debug( "out_revision $name" );
  778. $this->pop();
  779. if( $name != "revision" ) {
  780. return $this->throwXMLerror( "Expected </revision>, got </$name>" );
  781. }
  782. xml_set_element_handler( $parser, "in_page", "out_page" );
  783. if( $this->workRevision ) {
  784. $ok = call_user_func_array( $this->mRevisionCallback,
  785. array( $this->workRevision, $this ) );
  786. if( $ok ) {
  787. $this->workSuccessCount++;
  788. }
  789. }
  790. }
  791. function in_logitem( $parser, $name, $attribs ) {
  792. $name = $this->stripXmlNamespace($name);
  793. $this->debug( "in_logitem $name" );
  794. switch( $name ) {
  795. case "id":
  796. case "timestamp":
  797. case "comment":
  798. case "type":
  799. case "action":
  800. case "logtitle":
  801. case "params":
  802. $this->appendfield = $name;
  803. xml_set_element_handler( $parser, "in_nothing", "out_append" );
  804. xml_set_character_data_handler( $parser, "char_append" );
  805. break;
  806. case "contributor":
  807. $this->push( "contributor" );
  808. xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
  809. break;
  810. default:
  811. return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
  812. }
  813. }
  814. function out_logitem( $parser, $name ) {
  815. $name = $this->stripXmlNamespace($name);
  816. $this->debug( "out_logitem $name" );
  817. $this->pop();
  818. if( $name != "logitem" ) {
  819. return $this->throwXMLerror( "Expected </logitem>, got </$name>" );
  820. }
  821. xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
  822. if( $this->workRevision ) {
  823. $ok = call_user_func_array( $this->mLogItemCallback,
  824. array( $this->workRevision, $this ) );
  825. if( $ok ) {
  826. $this->workSuccessCount++;
  827. }
  828. }
  829. }
  830. function in_upload( $parser, $name, $attribs ) {
  831. $name = $this->stripXmlNamespace($name);
  832. $this->debug( "in_upload $name" );
  833. switch( $name ) {
  834. case "timestamp":
  835. case "comment":
  836. case "text":
  837. case "filename":
  838. case "src":
  839. case "size":
  840. $this->appendfield = $name;
  841. xml_set_element_handler( $parser, "in_nothing", "out_append" );
  842. xml_set_character_data_handler( $parser, "char_append" );
  843. break;
  844. case "contributor":
  845. $this->push( "contributor" );
  846. xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
  847. break;
  848. default:
  849. return $this->throwXMLerror( "Element <$name> not allowed in an <upload>." );
  850. }
  851. }
  852. function out_upload( $parser, $name ) {
  853. $name = $this->stripXmlNamespace($name);
  854. $this->debug( "out_revision $name" );
  855. $this->pop();
  856. if( $name != "upload" ) {
  857. return $this->throwXMLerror( "Expected </upload>, got </$name>" );
  858. }
  859. xml_set_element_handler( $parser, "in_page", "out_page" );
  860. if( $this->workRevision ) {
  861. $ok = call_user_func_array( $this->mUploadCallback,
  862. array( $this->workRevision, $this ) );
  863. if( $ok ) {
  864. $this->workUploadSuccessCount++;
  865. }
  866. }
  867. }
  868. function in_contributor( $parser, $name, $attribs ) {
  869. $name = $this->stripXmlNamespace($name);
  870. $this->debug( "in_contributor $name" );
  871. switch( $name ) {
  872. case "username":
  873. case "ip":
  874. case "id":
  875. $this->appendfield = $name;
  876. xml_set_element_handler( $parser, "in_nothing", "out_append" );
  877. xml_set_character_data_handler( $parser, "char_append" );
  878. break;
  879. default:
  880. $this->throwXMLerror( "Invalid tag <$name> in <contributor>" );
  881. }
  882. }
  883. function out_contributor( $parser, $name ) {
  884. $name = $this->stripXmlNamespace($name);
  885. $this->debug( "out_contributor $name" );
  886. $this->pop();
  887. if( $name != "contributor" ) {
  888. return $this->throwXMLerror( "Expected </contributor>, got </$name>" );
  889. }
  890. $parent = $this->parentTag();
  891. xml_set_element_handler( $parser, "in_$parent", "out_$parent" );
  892. }
  893. private function push( $name ) {
  894. array_push( $this->tagStack, $name );
  895. $this->debug( "PUSH $name" );
  896. }
  897. private function pop() {
  898. $name = array_pop( $this->tagStack );
  899. $this->debug( "POP $name" );
  900. return $name;
  901. }
  902. private function parentTag() {
  903. $name = $this->tagStack[count( $this->tagStack ) - 1];
  904. $this->debug( "PARENT $name" );
  905. return $name;
  906. }
  907. }
  908. /**
  909. * @todo document (e.g. one-sentence class description).
  910. * @ingroup SpecialPage
  911. */
  912. class ImportStringSource {
  913. function __construct( $string ) {
  914. $this->mString = $string;
  915. $this->mRead = false;
  916. }
  917. function atEnd() {
  918. return $this->mRead;
  919. }
  920. function readChunk() {
  921. if( $this->atEnd() ) {
  922. return false;
  923. } else {
  924. $this->mRead = true;
  925. return $this->mString;
  926. }
  927. }
  928. }
  929. /**
  930. * @todo document (e.g. one-sentence class description).
  931. * @ingroup SpecialPage
  932. */
  933. class ImportStreamSource {
  934. function __construct( $handle ) {
  935. $this->mHandle = $handle;
  936. }
  937. function atEnd() {
  938. return feof( $this->mHandle );
  939. }
  940. function readChunk() {
  941. return fread( $this->mHandle, 32768 );
  942. }
  943. static function newFromFile( $filename ) {
  944. $file = @fopen( $filename, 'rt' );
  945. if( !$file ) {
  946. return new WikiErrorMsg( "importcantopen" );
  947. }
  948. return new ImportStreamSource( $file );
  949. }
  950. static function newFromUpload( $fieldname = "xmlimport" ) {
  951. $upload =& $_FILES[$fieldname];
  952. if( !isset( $upload ) || !$upload['name'] ) {
  953. return new WikiErrorMsg( 'importnofile' );
  954. }
  955. if( !empty( $upload['error'] ) ) {
  956. switch($upload['error']){
  957. case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini.
  958. return new WikiErrorMsg( 'importuploaderrorsize' );
  959. case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form.
  960. return new WikiErrorMsg( 'importuploaderrorsize' );
  961. case 3: # The uploaded file was only partially uploaded
  962. return new WikiErrorMsg( 'importuploaderrorpartial' );
  963. case 6: #Missing a temporary folder. Introduced in PHP 4.3.10 and PHP 5.0.3.
  964. return new WikiErrorMsg( 'importuploaderrortemp' );
  965. # case else: # Currently impossible
  966. }
  967. }
  968. $fname = $upload['tmp_name'];
  969. if( is_uploaded_file( $fname ) ) {
  970. return ImportStreamSource::newFromFile( $fname );
  971. } else {
  972. return new WikiErrorMsg( 'importnofile' );
  973. }
  974. }
  975. static function newFromURL( $url, $method = 'GET' ) {
  976. wfDebug( __METHOD__ . ": opening $url\n" );
  977. # Use the standard HTTP fetch function; it times out
  978. # quicker and sorts out user-agent problems which might
  979. # otherwise prevent importing from large sites, such
  980. # as the Wikimedia cluster, etc.
  981. $data = Http::request( $method, $url );
  982. if( $data !== false ) {
  983. $file = tmpfile();
  984. fwrite( $file, $data );
  985. fflush( $file );
  986. fseek( $file, 0 );
  987. return new ImportStreamSource( $file );
  988. } else {
  989. return new WikiErrorMsg( 'importcantopen' );
  990. }
  991. }
  992. public static function newFromInterwiki( $interwiki, $page, $history = false, $templates = false, $pageLinkDepth = 0 ) {
  993. if( $page == '' ) {
  994. return new WikiErrorMsg( 'import-noarticle' );
  995. }
  996. $link = Title::newFromText( "$interwiki:Special:Export/$page" );
  997. if( is_null( $link ) || $link->getInterwiki() == '' ) {
  998. return new WikiErrorMsg( 'importbadinterwiki' );
  999. } else {
  1000. $params = array();
  1001. if ( $history ) $params['history'] = 1;
  1002. if ( $templates ) $params['templates'] = 1;
  1003. if ( $pageLinkDepth ) $params['pagelink-depth'] = $pageLinkDepth;
  1004. $url = $link->getFullUrl( $params );
  1005. # For interwikis, use POST to avoid redirects.
  1006. return ImportStreamSource::newFromURL( $url, "POST" );
  1007. }
  1008. }
  1009. }