populateImageSha1.php 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. <?php
  2. /**
  3. * Optional upgrade script to populate the img_sha1 field
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. * @ingroup Maintenance
  22. */
  23. require_once __DIR__ . '/Maintenance.php';
  24. /**
  25. * Maintenance script to populate the img_sha1 field.
  26. *
  27. * @ingroup Maintenance
  28. */
  29. class PopulateImageSha1 extends LoggedUpdateMaintenance {
  30. public function __construct() {
  31. parent::__construct();
  32. $this->addDescription( 'Populate the img_sha1 field' );
  33. $this->addOption( 'force', "Recalculate sha1 for rows that already have a value" );
  34. $this->addOption( 'multiversiononly', "Calculate only for files with several versions" );
  35. $this->addOption( 'method', "Use 'pipe' to pipe to mysql command line,\n" .
  36. "\t\tdefault uses Database class", false, true );
  37. $this->addOption(
  38. 'file',
  39. 'Fix for a specific file, without File: namespace prefixed',
  40. false,
  41. true
  42. );
  43. }
  44. protected function getUpdateKey() {
  45. return 'populate img_sha1';
  46. }
  47. protected function updateSkippedMessage() {
  48. return 'img_sha1 column of image table already populated.';
  49. }
  50. public function execute() {
  51. if ( $this->getOption( 'file' ) || $this->hasOption( 'multiversiononly' ) ) {
  52. $this->doDBUpdates(); // skip update log checks/saves
  53. } else {
  54. parent::execute();
  55. }
  56. }
  57. public function doDBUpdates() {
  58. $method = $this->getOption( 'method', 'normal' );
  59. $file = $this->getOption( 'file', '' );
  60. $force = $this->getOption( 'force' );
  61. $isRegen = ( $force || $file != '' ); // forced recalculation?
  62. $t = -microtime( true );
  63. $dbw = $this->getDB( DB_MASTER );
  64. if ( $file != '' ) {
  65. $res = $dbw->select(
  66. 'image',
  67. [ 'img_name' ],
  68. [ 'img_name' => $file ],
  69. __METHOD__
  70. );
  71. if ( !$res ) {
  72. $this->error( "No such file: $file", true );
  73. return false;
  74. }
  75. $this->output( "Populating img_sha1 field for specified files\n" );
  76. } else {
  77. if ( $this->hasOption( 'multiversiononly' ) ) {
  78. $conds = [];
  79. $this->output( "Populating and recalculating img_sha1 field for versioned files\n" );
  80. } elseif ( $force ) {
  81. $conds = [];
  82. $this->output( "Populating and recalculating img_sha1 field\n" );
  83. } else {
  84. $conds = [ 'img_sha1' => '' ];
  85. $this->output( "Populating img_sha1 field\n" );
  86. }
  87. if ( $this->hasOption( 'multiversiononly' ) ) {
  88. $res = $dbw->select( 'oldimage',
  89. [ 'img_name' => 'DISTINCT(oi_name)' ], $conds, __METHOD__ );
  90. } else {
  91. $res = $dbw->select( 'image', [ 'img_name' ], $conds, __METHOD__ );
  92. }
  93. }
  94. $imageTable = $dbw->tableName( 'image' );
  95. $oldImageTable = $dbw->tableName( 'oldimage' );
  96. if ( $method == 'pipe' ) {
  97. // Opening a pipe allows the SHA-1 operation to be done in parallel
  98. // with the database write operation, because the writes are queued
  99. // in the pipe buffer. This can improve performance by up to a
  100. // factor of 2.
  101. global $wgDBuser, $wgDBserver, $wgDBpassword, $wgDBname;
  102. $cmd = 'mysql -u' . wfEscapeShellArg( $wgDBuser ) .
  103. ' -h' . wfEscapeShellArg( $wgDBserver ) .
  104. ' -p' . wfEscapeShellArg( $wgDBpassword, $wgDBname );
  105. $this->output( "Using pipe method\n" );
  106. $pipe = popen( $cmd, 'w' );
  107. }
  108. $numRows = $res->numRows();
  109. $i = 0;
  110. foreach ( $res as $row ) {
  111. if ( $i % $this->mBatchSize == 0 ) {
  112. $this->output( sprintf(
  113. "Done %d of %d, %5.3f%% \r", $i, $numRows, $i / $numRows * 100 ) );
  114. wfWaitForSlaves();
  115. }
  116. $file = wfLocalFile( $row->img_name );
  117. if ( !$file ) {
  118. continue;
  119. }
  120. // Upgrade the current file version...
  121. $sha1 = $file->getRepo()->getFileSha1( $file->getPath() );
  122. if ( strval( $sha1 ) !== '' ) { // file on disk and hashed properly
  123. if ( $isRegen && $file->getSha1() !== $sha1 ) {
  124. // The population was probably done already. If the old SHA1
  125. // does not match, then both fix the SHA1 and the metadata.
  126. $file->upgradeRow();
  127. } else {
  128. $sql = "UPDATE $imageTable SET img_sha1=" . $dbw->addQuotes( $sha1 ) .
  129. " WHERE img_name=" . $dbw->addQuotes( $file->getName() );
  130. if ( $method == 'pipe' ) {
  131. fwrite( $pipe, "$sql;\n" );
  132. } else {
  133. $dbw->query( $sql, __METHOD__ );
  134. }
  135. }
  136. }
  137. // Upgrade the old file versions...
  138. foreach ( $file->getHistory() as $oldFile ) {
  139. $sha1 = $oldFile->getRepo()->getFileSha1( $oldFile->getPath() );
  140. if ( strval( $sha1 ) !== '' ) { // file on disk and hashed properly
  141. if ( $isRegen && $oldFile->getSha1() !== $sha1 ) {
  142. // The population was probably done already. If the old SHA1
  143. // does not match, then both fix the SHA1 and the metadata.
  144. $oldFile->upgradeRow();
  145. } else {
  146. $sql = "UPDATE $oldImageTable SET oi_sha1=" . $dbw->addQuotes( $sha1 ) .
  147. " WHERE (oi_name=" . $dbw->addQuotes( $oldFile->getName() ) . " AND" .
  148. " oi_archive_name=" . $dbw->addQuotes( $oldFile->getArchiveName() ) . ")";
  149. if ( $method == 'pipe' ) {
  150. fwrite( $pipe, "$sql;\n" );
  151. } else {
  152. $dbw->query( $sql, __METHOD__ );
  153. }
  154. }
  155. }
  156. }
  157. $i++;
  158. }
  159. if ( $method == 'pipe' ) {
  160. fflush( $pipe );
  161. pclose( $pipe );
  162. }
  163. $t += microtime( true );
  164. $this->output( sprintf( "\nDone %d files in %.1f seconds\n", $numRows, $t ) );
  165. return !$file; // we only updated *some* files, don't log
  166. }
  167. }
  168. $maintClass = "PopulateImageSha1";
  169. require_once RUN_MAINTENANCE_IF_MAIN;