preprocessDump.php 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. <?php
  2. /**
  3. * Take page text out of an XML dump file and preprocess it to obj.
  4. * It may be useful for getting preprocessor statistics or filling the
  5. * preprocessor cache.
  6. *
  7. * Copyright © 2011 Platonides - https://www.mediawiki.org/
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU General Public License as published by
  11. * the Free Software Foundation; either version 2 of the License, or
  12. * (at your option) any later version.
  13. *
  14. * This program is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. * GNU General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU General Public License along
  20. * with this program; if not, write to the Free Software Foundation, Inc.,
  21. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  22. * http://www.gnu.org/copyleft/gpl.html
  23. *
  24. * @file
  25. * @ingroup Maintenance
  26. */
  27. require_once __DIR__ . '/dumpIterator.php';
  28. /**
  29. * Maintenance script that takes page text out of an XML dump file and
  30. * preprocesses it to obj.
  31. *
  32. * @ingroup Maintenance
  33. */
  34. class PreprocessDump extends DumpIterator {
  35. /* Variables for dressing up as a parser */
  36. public $mTitle = 'PreprocessDump';
  37. public $mPPNodeCount = 0;
  38. public function getStripList() {
  39. global $wgParser;
  40. return $wgParser->getStripList();
  41. }
  42. public function __construct() {
  43. parent::__construct();
  44. $this->addOption( 'cache', 'Use and populate the preprocessor cache.', false, false );
  45. $this->addOption( 'preprocessor', 'Preprocessor to use.', false, false );
  46. }
  47. public function getDbType() {
  48. return Maintenance::DB_NONE;
  49. }
  50. public function checkOptions() {
  51. global $wgParser, $wgParserConf, $wgPreprocessorCacheThreshold;
  52. if ( !$this->hasOption( 'cache' ) ) {
  53. $wgPreprocessorCacheThreshold = false;
  54. }
  55. if ( $this->hasOption( 'preprocessor' ) ) {
  56. $name = $this->getOption( 'preprocessor' );
  57. } elseif ( isset( $wgParserConf['preprocessorClass'] ) ) {
  58. $name = $wgParserConf['preprocessorClass'];
  59. } else {
  60. $name = 'Preprocessor_DOM';
  61. }
  62. $wgParser->firstCallInit();
  63. $this->mPreprocessor = new $name( $this );
  64. }
  65. /**
  66. * Callback function for each revision, preprocessToObj()
  67. * @param Revision $rev
  68. */
  69. public function processRevision( $rev ) {
  70. $content = $rev->getContent( Revision::RAW );
  71. if ( $content->getModel() !== CONTENT_MODEL_WIKITEXT ) {
  72. return;
  73. }
  74. try {
  75. $this->mPreprocessor->preprocessToObj( strval( $content->getNativeData() ), 0 );
  76. } catch ( Exception $e ) {
  77. $this->error( "Caught exception " . $e->getMessage() . " in "
  78. . $rev->getTitle()->getPrefixedText() );
  79. }
  80. }
  81. }
  82. $maintClass = "PreprocessDump";
  83. require_once RUN_MAINTENANCE_IF_MAIN;