syncFileBackend.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. <?php
  2. /**
  3. * Sync one file backend to another based on the journal of later.
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. * @ingroup Maintenance
  22. */
  23. require_once __DIR__ . '/Maintenance.php';
  24. /**
  25. * Maintenance script that syncs one file backend to another based on
  26. * the journal of later.
  27. *
  28. * @ingroup Maintenance
  29. */
  30. class SyncFileBackend extends Maintenance {
  31. public function __construct() {
  32. parent::__construct();
  33. $this->addDescription( 'Sync one file backend with another using the journal' );
  34. $this->addOption( 'src', 'Name of backend to sync from', true, true );
  35. $this->addOption( 'dst', 'Name of destination backend to sync', false, true );
  36. $this->addOption( 'start', 'Starting journal ID', false, true );
  37. $this->addOption( 'end', 'Ending journal ID', false, true );
  38. $this->addOption( 'posdir', 'Directory to read/record journal positions', false, true );
  39. $this->addOption( 'posdump', 'Just dump current journal position into the position dir.' );
  40. $this->addOption( 'postime', 'For position dumps, get the ID at this time', false, true );
  41. $this->addOption( 'backoff', 'Stop at entries younger than this age (sec).', false, true );
  42. $this->addOption( 'verbose', 'Verbose mode', false, false, 'v' );
  43. $this->setBatchSize( 50 );
  44. }
  45. public function execute() {
  46. $src = FileBackendGroup::singleton()->get( $this->getOption( 'src' ) );
  47. $posDir = $this->getOption( 'posdir' );
  48. $posFile = $posDir ? $posDir . '/' . wfWikiID() : false;
  49. if ( $this->hasOption( 'posdump' ) ) {
  50. // Just dump the current position into the specified position dir
  51. if ( !$this->hasOption( 'posdir' ) ) {
  52. $this->error( "Param posdir required!", 1 );
  53. }
  54. if ( $this->hasOption( 'postime' ) ) {
  55. $id = (int)$src->getJournal()->getPositionAtTime( $this->getOption( 'postime' ) );
  56. $this->output( "Requested journal position is $id.\n" );
  57. } else {
  58. $id = (int)$src->getJournal()->getCurrentPosition();
  59. $this->output( "Current journal position is $id.\n" );
  60. }
  61. if ( file_put_contents( $posFile, $id, LOCK_EX ) !== false ) {
  62. $this->output( "Saved journal position file.\n" );
  63. } else {
  64. $this->output( "Could not save journal position file.\n" );
  65. }
  66. if ( $this->isQuiet() ) {
  67. print $id; // give a single machine-readable number
  68. }
  69. return;
  70. }
  71. if ( !$this->hasOption( 'dst' ) ) {
  72. $this->error( "Param dst required!", 1 );
  73. }
  74. $dst = FileBackendGroup::singleton()->get( $this->getOption( 'dst' ) );
  75. $start = $this->getOption( 'start', 0 );
  76. if ( !$start && $posFile && is_dir( $posDir ) ) {
  77. $start = is_file( $posFile )
  78. ? (int)trim( file_get_contents( $posFile ) )
  79. : 0;
  80. ++$start; // we already did this ID, start with the next one
  81. $startFromPosFile = true;
  82. } else {
  83. $startFromPosFile = false;
  84. }
  85. if ( $this->hasOption( 'backoff' ) ) {
  86. $time = time() - $this->getOption( 'backoff', 0 );
  87. $end = (int)$src->getJournal()->getPositionAtTime( $time );
  88. } else {
  89. $end = $this->getOption( 'end', INF );
  90. }
  91. $this->output( "Synchronizing backend '{$dst->getName()}' to '{$src->getName()}'...\n" );
  92. $this->output( "Starting journal position is $start.\n" );
  93. if ( is_finite( $end ) ) {
  94. $this->output( "Ending journal position is $end.\n" );
  95. }
  96. // Periodically update the position file
  97. $callback = function ( $pos ) use ( $startFromPosFile, $posFile, $start ) {
  98. if ( $startFromPosFile && $pos >= $start ) { // successfully advanced
  99. file_put_contents( $posFile, $pos, LOCK_EX );
  100. }
  101. };
  102. // Actually sync the dest backend with the reference backend
  103. $lastOKPos = $this->syncBackends( $src, $dst, $start, $end, $callback );
  104. // Update the sync position file
  105. if ( $startFromPosFile && $lastOKPos >= $start ) { // successfully advanced
  106. if ( file_put_contents( $posFile, $lastOKPos, LOCK_EX ) !== false ) {
  107. $this->output( "Updated journal position file.\n" );
  108. } else {
  109. $this->output( "Could not update journal position file.\n" );
  110. }
  111. }
  112. if ( $lastOKPos === false ) {
  113. if ( !$start ) {
  114. $this->output( "No journal entries found.\n" );
  115. } else {
  116. $this->output( "No new journal entries found.\n" );
  117. }
  118. } else {
  119. $this->output( "Stopped synchronization at journal position $lastOKPos.\n" );
  120. }
  121. if ( $this->isQuiet() ) {
  122. print $lastOKPos; // give a single machine-readable number
  123. }
  124. }
  125. /**
  126. * Sync $dst backend to $src backend based on the $src logs given after $start.
  127. * Returns the journal entry ID this advanced to and handled (inclusive).
  128. *
  129. * @param FileBackend $src
  130. * @param FileBackend $dst
  131. * @param int $start Starting journal position
  132. * @param int $end Starting journal position
  133. * @param Closure $callback Callback to update any position file
  134. * @return int|bool Journal entry ID or false if there are none
  135. */
  136. protected function syncBackends(
  137. FileBackend $src, FileBackend $dst, $start, $end, Closure $callback
  138. ) {
  139. $lastOKPos = 0; // failed
  140. $first = true; // first batch
  141. if ( $start > $end ) { // sanity
  142. $this->error( "Error: given starting ID greater than ending ID.", 1 );
  143. }
  144. $next = null;
  145. do {
  146. $limit = min( $this->mBatchSize, $end - $start + 1 ); // don't go pass ending ID
  147. $this->output( "Doing id $start to " . ( $start + $limit - 1 ) . "...\n" );
  148. $entries = $src->getJournal()->getChangeEntries( $start, $limit, $next );
  149. $start = $next; // start where we left off next time
  150. if ( $first && !count( $entries ) ) {
  151. return false; // nothing to do
  152. }
  153. $first = false;
  154. $lastPosInBatch = 0;
  155. $pathsInBatch = []; // changed paths
  156. foreach ( $entries as $entry ) {
  157. if ( $entry['op'] !== 'null' ) { // null ops are just for reference
  158. $pathsInBatch[$entry['path']] = 1; // remove duplicates
  159. }
  160. $lastPosInBatch = $entry['id'];
  161. }
  162. $status = $this->syncFileBatch( array_keys( $pathsInBatch ), $src, $dst );
  163. if ( $status->isOK() ) {
  164. $lastOKPos = max( $lastOKPos, $lastPosInBatch );
  165. $callback( $lastOKPos ); // update position file
  166. } else {
  167. $this->error( print_r( $status->getErrorsArray(), true ) );
  168. break; // no gaps; everything up to $lastPos must be OK
  169. }
  170. if ( !$start ) {
  171. $this->output( "End of journal entries.\n" );
  172. }
  173. } while ( $start && $start <= $end );
  174. return $lastOKPos;
  175. }
  176. /**
  177. * Sync particular files of backend $src to the corresponding $dst backend files
  178. *
  179. * @param array $paths
  180. * @param FileBackend $src
  181. * @param FileBackend $dst
  182. * @return Status
  183. */
  184. protected function syncFileBatch( array $paths, FileBackend $src, FileBackend $dst ) {
  185. $status = Status::newGood();
  186. if ( !count( $paths ) ) {
  187. return $status; // nothing to do
  188. }
  189. // Source: convert internal backend names (FileBackendMultiWrite) to the public one
  190. $sPaths = $this->replaceNamePaths( $paths, $src );
  191. // Destination: get corresponding path name
  192. $dPaths = $this->replaceNamePaths( $paths, $dst );
  193. // Lock the live backend paths from modification
  194. $sLock = $src->getScopedFileLocks( $sPaths, LockManager::LOCK_UW, $status );
  195. $eLock = $dst->getScopedFileLocks( $dPaths, LockManager::LOCK_EX, $status );
  196. if ( !$status->isOK() ) {
  197. return $status;
  198. }
  199. $src->preloadFileStat( [ 'srcs' => $sPaths, 'latest' => 1 ] );
  200. $dst->preloadFileStat( [ 'srcs' => $dPaths, 'latest' => 1 ] );
  201. $ops = [];
  202. $fsFiles = [];
  203. foreach ( $sPaths as $i => $sPath ) {
  204. $dPath = $dPaths[$i]; // destination
  205. $sExists = $src->fileExists( [ 'src' => $sPath, 'latest' => 1 ] );
  206. if ( $sExists === true ) { // exists in source
  207. if ( $this->filesAreSame( $src, $dst, $sPath, $dPath ) ) {
  208. continue; // avoid local copies for non-FS backends
  209. }
  210. // Note: getLocalReference() is fast for FS backends
  211. $fsFile = $src->getLocalReference( [ 'src' => $sPath, 'latest' => 1 ] );
  212. if ( !$fsFile ) {
  213. $this->error( "Unable to sync '$dPath': could not get local copy." );
  214. $status->fatal( 'backend-fail-internal', $src->getName() );
  215. return $status;
  216. }
  217. $fsFiles[] = $fsFile; // keep TempFSFile objects alive as needed
  218. // Note: prepare() is usually fast for key/value backends
  219. $status->merge( $dst->prepare( [
  220. 'dir' => dirname( $dPath ), 'bypassReadOnly' => 1 ] ) );
  221. if ( !$status->isOK() ) {
  222. return $status;
  223. }
  224. $ops[] = [ 'op' => 'store',
  225. 'src' => $fsFile->getPath(), 'dst' => $dPath, 'overwrite' => 1 ];
  226. } elseif ( $sExists === false ) { // does not exist in source
  227. $ops[] = [ 'op' => 'delete', 'src' => $dPath, 'ignoreMissingSource' => 1 ];
  228. } else { // error
  229. $this->error( "Unable to sync '$dPath': could not stat file." );
  230. $status->fatal( 'backend-fail-internal', $src->getName() );
  231. return $status;
  232. }
  233. }
  234. $t_start = microtime( true );
  235. $status = $dst->doQuickOperations( $ops, [ 'bypassReadOnly' => 1 ] );
  236. if ( !$status->isOK() ) {
  237. sleep( 10 ); // wait and retry copy again
  238. $status = $dst->doQuickOperations( $ops, [ 'bypassReadOnly' => 1 ] );
  239. }
  240. $ellapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 );
  241. if ( $status->isOK() && $this->getOption( 'verbose' ) ) {
  242. $this->output( "Synchronized these file(s) [{$ellapsed_ms}ms]:\n" .
  243. implode( "\n", $dPaths ) . "\n" );
  244. }
  245. return $status;
  246. }
  247. /**
  248. * Substitute the backend name of storage paths with that of a given one
  249. *
  250. * @param array|string $paths List of paths or single string path
  251. * @param FileBackend $backend
  252. * @return array|string
  253. */
  254. protected function replaceNamePaths( $paths, FileBackend $backend ) {
  255. return preg_replace(
  256. '!^mwstore://([^/]+)!',
  257. StringUtils::escapeRegexReplacement( "mwstore://" . $backend->getName() ),
  258. $paths // string or array
  259. );
  260. }
  261. protected function filesAreSame( FileBackend $src, FileBackend $dst, $sPath, $dPath ) {
  262. return (
  263. ( $src->getFileSize( [ 'src' => $sPath ] )
  264. === $dst->getFileSize( [ 'src' => $dPath ] ) // short-circuit
  265. ) && ( $src->getFileSha1Base36( [ 'src' => $sPath ] )
  266. === $dst->getFileSha1Base36( [ 'src' => $dPath ] )
  267. )
  268. );
  269. }
  270. }
  271. $maintClass = "SyncFileBackend";
  272. require_once RUN_MAINTENANCE_IF_MAIN;