LocalRepo.php 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596
  1. <?php
  2. /**
  3. * Local repository that stores files in the local filesystem and registers them
  4. * in the wiki's own database.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License along
  17. * with this program; if not, write to the Free Software Foundation, Inc.,
  18. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  19. * http://www.gnu.org/copyleft/gpl.html
  20. *
  21. * @file
  22. * @ingroup FileRepo
  23. */
  24. use Wikimedia\Rdbms\ResultWrapper;
  25. use Wikimedia\Rdbms\Database;
  26. use Wikimedia\Rdbms\IDatabase;
  27. /**
  28. * A repository that stores files in the local filesystem and registers them
  29. * in the wiki's own database. This is the most commonly used repository class.
  30. *
  31. * @ingroup FileRepo
  32. */
  33. class LocalRepo extends FileRepo {
  34. /** @var callable */
  35. protected $fileFactory = [ LocalFile::class, 'newFromTitle' ];
  36. /** @var callable */
  37. protected $fileFactoryKey = [ LocalFile::class, 'newFromKey' ];
  38. /** @var callable */
  39. protected $fileFromRowFactory = [ LocalFile::class, 'newFromRow' ];
  40. /** @var callable */
  41. protected $oldFileFromRowFactory = [ OldLocalFile::class, 'newFromRow' ];
  42. /** @var callable */
  43. protected $oldFileFactory = [ OldLocalFile::class, 'newFromTitle' ];
  44. /** @var callable */
  45. protected $oldFileFactoryKey = [ OldLocalFile::class, 'newFromKey' ];
  46. function __construct( array $info = null ) {
  47. parent::__construct( $info );
  48. $this->hasSha1Storage = isset( $info['storageLayout'] )
  49. && $info['storageLayout'] === 'sha1';
  50. if ( $this->hasSha1Storage() ) {
  51. $this->backend = new FileBackendDBRepoWrapper( [
  52. 'backend' => $this->backend,
  53. 'repoName' => $this->name,
  54. 'dbHandleFactory' => $this->getDBFactory()
  55. ] );
  56. }
  57. }
  58. /**
  59. * @throws MWException
  60. * @param stdClass $row
  61. * @return LocalFile
  62. */
  63. function newFileFromRow( $row ) {
  64. if ( isset( $row->img_name ) ) {
  65. return call_user_func( $this->fileFromRowFactory, $row, $this );
  66. } elseif ( isset( $row->oi_name ) ) {
  67. return call_user_func( $this->oldFileFromRowFactory, $row, $this );
  68. } else {
  69. throw new MWException( __METHOD__ . ': invalid row' );
  70. }
  71. }
  72. /**
  73. * @param Title $title
  74. * @param string $archiveName
  75. * @return OldLocalFile
  76. */
  77. function newFromArchiveName( $title, $archiveName ) {
  78. return OldLocalFile::newFromArchiveName( $title, $this, $archiveName );
  79. }
  80. /**
  81. * Delete files in the deleted directory if they are not referenced in the
  82. * filearchive table. This needs to be done in the repo because it needs to
  83. * interleave database locks with file operations, which is potentially a
  84. * remote operation.
  85. *
  86. * @param string[] $storageKeys
  87. *
  88. * @return Status
  89. */
  90. function cleanupDeletedBatch( array $storageKeys ) {
  91. if ( $this->hasSha1Storage() ) {
  92. wfDebug( __METHOD__ . ": skipped because storage uses sha1 paths\n" );
  93. return Status::newGood();
  94. }
  95. $backend = $this->backend; // convenience
  96. $root = $this->getZonePath( 'deleted' );
  97. $dbw = $this->getMasterDB();
  98. $status = $this->newGood();
  99. $storageKeys = array_unique( $storageKeys );
  100. foreach ( $storageKeys as $key ) {
  101. $hashPath = $this->getDeletedHashPath( $key );
  102. $path = "$root/$hashPath$key";
  103. $dbw->startAtomic( __METHOD__ );
  104. // Check for usage in deleted/hidden files and preemptively
  105. // lock the key to avoid any future use until we are finished.
  106. $deleted = $this->deletedFileHasKey( $key, 'lock' );
  107. $hidden = $this->hiddenFileHasKey( $key, 'lock' );
  108. if ( !$deleted && !$hidden ) { // not in use now
  109. wfDebug( __METHOD__ . ": deleting $key\n" );
  110. $op = [ 'op' => 'delete', 'src' => $path ];
  111. if ( !$backend->doOperation( $op )->isOK() ) {
  112. $status->error( 'undelete-cleanup-error', $path );
  113. $status->failCount++;
  114. }
  115. } else {
  116. wfDebug( __METHOD__ . ": $key still in use\n" );
  117. $status->successCount++;
  118. }
  119. $dbw->endAtomic( __METHOD__ );
  120. }
  121. return $status;
  122. }
  123. /**
  124. * Check if a deleted (filearchive) file has this sha1 key
  125. *
  126. * @param string $key File storage key (base-36 sha1 key with file extension)
  127. * @param string|null $lock Use "lock" to lock the row via FOR UPDATE
  128. * @return bool File with this key is in use
  129. */
  130. protected function deletedFileHasKey( $key, $lock = null ) {
  131. $options = ( $lock === 'lock' ) ? [ 'FOR UPDATE' ] : [];
  132. $dbw = $this->getMasterDB();
  133. return (bool)$dbw->selectField( 'filearchive', '1',
  134. [ 'fa_storage_group' => 'deleted', 'fa_storage_key' => $key ],
  135. __METHOD__, $options
  136. );
  137. }
  138. /**
  139. * Check if a hidden (revision delete) file has this sha1 key
  140. *
  141. * @param string $key File storage key (base-36 sha1 key with file extension)
  142. * @param string|null $lock Use "lock" to lock the row via FOR UPDATE
  143. * @return bool File with this key is in use
  144. */
  145. protected function hiddenFileHasKey( $key, $lock = null ) {
  146. $options = ( $lock === 'lock' ) ? [ 'FOR UPDATE' ] : [];
  147. $sha1 = self::getHashFromKey( $key );
  148. $ext = File::normalizeExtension( substr( $key, strcspn( $key, '.' ) + 1 ) );
  149. $dbw = $this->getMasterDB();
  150. return (bool)$dbw->selectField( 'oldimage', '1',
  151. [ 'oi_sha1' => $sha1,
  152. 'oi_archive_name ' . $dbw->buildLike( $dbw->anyString(), ".$ext" ),
  153. $dbw->bitAnd( 'oi_deleted', File::DELETED_FILE ) => File::DELETED_FILE ],
  154. __METHOD__, $options
  155. );
  156. }
  157. /**
  158. * Gets the SHA1 hash from a storage key
  159. *
  160. * @param string $key
  161. * @return string
  162. */
  163. public static function getHashFromKey( $key ) {
  164. return strtok( $key, '.' );
  165. }
  166. /**
  167. * Checks if there is a redirect named as $title
  168. *
  169. * @param Title $title Title of file
  170. * @return bool|Title
  171. */
  172. function checkRedirect( Title $title ) {
  173. $title = File::normalizeTitle( $title, 'exception' );
  174. $memcKey = $this->getSharedCacheKey( 'image_redirect', md5( $title->getDBkey() ) );
  175. if ( $memcKey === false ) {
  176. $memcKey = $this->getLocalCacheKey( 'image_redirect', md5( $title->getDBkey() ) );
  177. $expiry = 300; // no invalidation, 5 minutes
  178. } else {
  179. $expiry = 86400; // has invalidation, 1 day
  180. }
  181. $method = __METHOD__;
  182. $redirDbKey = ObjectCache::getMainWANInstance()->getWithSetCallback(
  183. $memcKey,
  184. $expiry,
  185. function ( $oldValue, &$ttl, array &$setOpts ) use ( $method, $title ) {
  186. $dbr = $this->getReplicaDB(); // possibly remote DB
  187. $setOpts += Database::getCacheSetOptions( $dbr );
  188. if ( $title instanceof Title ) {
  189. $row = $dbr->selectRow(
  190. [ 'page', 'redirect' ],
  191. [ 'rd_namespace', 'rd_title' ],
  192. [
  193. 'page_namespace' => $title->getNamespace(),
  194. 'page_title' => $title->getDBkey(),
  195. 'rd_from = page_id'
  196. ],
  197. $method
  198. );
  199. } else {
  200. $row = false;
  201. }
  202. return ( $row && $row->rd_namespace == NS_FILE )
  203. ? Title::makeTitle( $row->rd_namespace, $row->rd_title )->getDBkey()
  204. : ''; // negative cache
  205. },
  206. [ 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
  207. );
  208. // @note: also checks " " for b/c
  209. if ( $redirDbKey !== ' ' && strval( $redirDbKey ) !== '' ) {
  210. // Page is a redirect to another file
  211. return Title::newFromText( $redirDbKey, NS_FILE );
  212. }
  213. return false; // no redirect
  214. }
  215. public function findFiles( array $items, $flags = 0 ) {
  216. $finalFiles = []; // map of (DB key => corresponding File) for matches
  217. $searchSet = []; // map of (normalized DB key => search params)
  218. foreach ( $items as $item ) {
  219. if ( is_array( $item ) ) {
  220. $title = File::normalizeTitle( $item['title'] );
  221. if ( $title ) {
  222. $searchSet[$title->getDBkey()] = $item;
  223. }
  224. } else {
  225. $title = File::normalizeTitle( $item );
  226. if ( $title ) {
  227. $searchSet[$title->getDBkey()] = [];
  228. }
  229. }
  230. }
  231. $fileMatchesSearch = function ( File $file, array $search ) {
  232. // Note: file name comparison done elsewhere (to handle redirects)
  233. $user = ( !empty( $search['private'] ) && $search['private'] instanceof User )
  234. ? $search['private']
  235. : null;
  236. return (
  237. $file->exists() &&
  238. (
  239. ( empty( $search['time'] ) && !$file->isOld() ) ||
  240. ( !empty( $search['time'] ) && $search['time'] === $file->getTimestamp() )
  241. ) &&
  242. ( !empty( $search['private'] ) || !$file->isDeleted( File::DELETED_FILE ) ) &&
  243. $file->userCan( File::DELETED_FILE, $user )
  244. );
  245. };
  246. $applyMatchingFiles = function ( ResultWrapper $res, &$searchSet, &$finalFiles )
  247. use ( $fileMatchesSearch, $flags )
  248. {
  249. global $wgContLang;
  250. $info = $this->getInfo();
  251. foreach ( $res as $row ) {
  252. $file = $this->newFileFromRow( $row );
  253. // There must have been a search for this DB key, but this has to handle the
  254. // cases were title capitalization is different on the client and repo wikis.
  255. $dbKeysLook = [ strtr( $file->getName(), ' ', '_' ) ];
  256. if ( !empty( $info['initialCapital'] ) ) {
  257. // Search keys for "hi.png" and "Hi.png" should use the "Hi.png file"
  258. $dbKeysLook[] = $wgContLang->lcfirst( $file->getName() );
  259. }
  260. foreach ( $dbKeysLook as $dbKey ) {
  261. if ( isset( $searchSet[$dbKey] )
  262. && $fileMatchesSearch( $file, $searchSet[$dbKey] )
  263. ) {
  264. $finalFiles[$dbKey] = ( $flags & FileRepo::NAME_AND_TIME_ONLY )
  265. ? [ 'title' => $dbKey, 'timestamp' => $file->getTimestamp() ]
  266. : $file;
  267. unset( $searchSet[$dbKey] );
  268. }
  269. }
  270. }
  271. };
  272. $dbr = $this->getReplicaDB();
  273. // Query image table
  274. $imgNames = [];
  275. foreach ( array_keys( $searchSet ) as $dbKey ) {
  276. $imgNames[] = $this->getNameFromTitle( File::normalizeTitle( $dbKey ) );
  277. }
  278. if ( count( $imgNames ) ) {
  279. $fileQuery = LocalFile::getQueryInfo();
  280. $res = $dbr->select( $fileQuery['tables'], $fileQuery['fields'], [ 'img_name' => $imgNames ],
  281. __METHOD__, [], $fileQuery['joins'] );
  282. $applyMatchingFiles( $res, $searchSet, $finalFiles );
  283. }
  284. // Query old image table
  285. $oiConds = []; // WHERE clause array for each file
  286. foreach ( $searchSet as $dbKey => $search ) {
  287. if ( isset( $search['time'] ) ) {
  288. $oiConds[] = $dbr->makeList(
  289. [
  290. 'oi_name' => $this->getNameFromTitle( File::normalizeTitle( $dbKey ) ),
  291. 'oi_timestamp' => $dbr->timestamp( $search['time'] )
  292. ],
  293. LIST_AND
  294. );
  295. }
  296. }
  297. if ( count( $oiConds ) ) {
  298. $fileQuery = OldLocalFile::getQueryInfo();
  299. $res = $dbr->select( $fileQuery['tables'], $fileQuery['fields'],
  300. $dbr->makeList( $oiConds, LIST_OR ),
  301. __METHOD__, [], $fileQuery['joins'] );
  302. $applyMatchingFiles( $res, $searchSet, $finalFiles );
  303. }
  304. // Check for redirects...
  305. foreach ( $searchSet as $dbKey => $search ) {
  306. if ( !empty( $search['ignoreRedirect'] ) ) {
  307. continue;
  308. }
  309. $title = File::normalizeTitle( $dbKey );
  310. $redir = $this->checkRedirect( $title ); // hopefully hits memcached
  311. if ( $redir && $redir->getNamespace() == NS_FILE ) {
  312. $file = $this->newFile( $redir );
  313. if ( $file && $fileMatchesSearch( $file, $search ) ) {
  314. $file->redirectedFrom( $title->getDBkey() );
  315. if ( $flags & FileRepo::NAME_AND_TIME_ONLY ) {
  316. $finalFiles[$dbKey] = [
  317. 'title' => $file->getTitle()->getDBkey(),
  318. 'timestamp' => $file->getTimestamp()
  319. ];
  320. } else {
  321. $finalFiles[$dbKey] = $file;
  322. }
  323. }
  324. }
  325. }
  326. return $finalFiles;
  327. }
  328. /**
  329. * Get an array or iterator of file objects for files that have a given
  330. * SHA-1 content hash.
  331. *
  332. * @param string $hash A sha1 hash to look for
  333. * @return LocalFile[]
  334. */
  335. function findBySha1( $hash ) {
  336. $dbr = $this->getReplicaDB();
  337. $fileQuery = LocalFile::getQueryInfo();
  338. $res = $dbr->select(
  339. $fileQuery['tables'],
  340. $fileQuery['fields'],
  341. [ 'img_sha1' => $hash ],
  342. __METHOD__,
  343. [ 'ORDER BY' => 'img_name' ],
  344. $fileQuery['joins']
  345. );
  346. $result = [];
  347. foreach ( $res as $row ) {
  348. $result[] = $this->newFileFromRow( $row );
  349. }
  350. $res->free();
  351. return $result;
  352. }
  353. /**
  354. * Get an array of arrays or iterators of file objects for files that
  355. * have the given SHA-1 content hashes.
  356. *
  357. * Overrides generic implementation in FileRepo for performance reason
  358. *
  359. * @param string[] $hashes An array of hashes
  360. * @return array[] An Array of arrays or iterators of file objects and the hash as key
  361. */
  362. function findBySha1s( array $hashes ) {
  363. if ( !count( $hashes ) ) {
  364. return []; // empty parameter
  365. }
  366. $dbr = $this->getReplicaDB();
  367. $fileQuery = LocalFile::getQueryInfo();
  368. $res = $dbr->select(
  369. $fileQuery['tables'],
  370. $fileQuery['fields'],
  371. [ 'img_sha1' => $hashes ],
  372. __METHOD__,
  373. [ 'ORDER BY' => 'img_name' ],
  374. $fileQuery['joins']
  375. );
  376. $result = [];
  377. foreach ( $res as $row ) {
  378. $file = $this->newFileFromRow( $row );
  379. $result[$file->getSha1()][] = $file;
  380. }
  381. $res->free();
  382. return $result;
  383. }
  384. /**
  385. * Return an array of files where the name starts with $prefix.
  386. *
  387. * @param string $prefix The prefix to search for
  388. * @param int $limit The maximum amount of files to return
  389. * @return LocalFile[]
  390. */
  391. public function findFilesByPrefix( $prefix, $limit ) {
  392. $selectOptions = [ 'ORDER BY' => 'img_name', 'LIMIT' => intval( $limit ) ];
  393. // Query database
  394. $dbr = $this->getReplicaDB();
  395. $fileQuery = LocalFile::getQueryInfo();
  396. $res = $dbr->select(
  397. $fileQuery['tables'],
  398. $fileQuery['fields'],
  399. 'img_name ' . $dbr->buildLike( $prefix, $dbr->anyString() ),
  400. __METHOD__,
  401. $selectOptions,
  402. $fileQuery['joins']
  403. );
  404. // Build file objects
  405. $files = [];
  406. foreach ( $res as $row ) {
  407. $files[] = $this->newFileFromRow( $row );
  408. }
  409. return $files;
  410. }
  411. /**
  412. * Get a connection to the replica DB
  413. * @return IDatabase
  414. */
  415. function getReplicaDB() {
  416. return wfGetDB( DB_REPLICA );
  417. }
  418. /**
  419. * Alias for getReplicaDB()
  420. *
  421. * @return IDatabase
  422. * @deprecated Since 1.29
  423. */
  424. function getSlaveDB() {
  425. return $this->getReplicaDB();
  426. }
  427. /**
  428. * Get a connection to the master DB
  429. * @return IDatabase
  430. */
  431. function getMasterDB() {
  432. return wfGetDB( DB_MASTER );
  433. }
  434. /**
  435. * Get a callback to get a DB handle given an index (DB_REPLICA/DB_MASTER)
  436. * @return Closure
  437. */
  438. protected function getDBFactory() {
  439. return function ( $index ) {
  440. return wfGetDB( $index );
  441. };
  442. }
  443. /**
  444. * Get a key on the primary cache for this repository.
  445. * Returns false if the repository's cache is not accessible at this site.
  446. * The parameters are the parts of the key, as for wfMemcKey().
  447. *
  448. * @return string
  449. */
  450. function getSharedCacheKey( /*...*/ ) {
  451. $args = func_get_args();
  452. return call_user_func_array( 'wfMemcKey', $args );
  453. }
  454. /**
  455. * Invalidates image redirect cache related to that image
  456. *
  457. * @param Title $title Title of page
  458. * @return void
  459. */
  460. function invalidateImageRedirect( Title $title ) {
  461. $key = $this->getSharedCacheKey( 'image_redirect', md5( $title->getDBkey() ) );
  462. if ( $key ) {
  463. $this->getMasterDB()->onTransactionPreCommitOrIdle(
  464. function () use ( $key ) {
  465. ObjectCache::getMainWANInstance()->delete( $key );
  466. },
  467. __METHOD__
  468. );
  469. }
  470. }
  471. /**
  472. * Return information about the repository.
  473. *
  474. * @return array
  475. * @since 1.22
  476. */
  477. function getInfo() {
  478. global $wgFavicon;
  479. return array_merge( parent::getInfo(), [
  480. 'favicon' => wfExpandUrl( $wgFavicon ),
  481. ] );
  482. }
  483. public function store( $srcPath, $dstZone, $dstRel, $flags = 0 ) {
  484. return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
  485. }
  486. public function storeBatch( array $triplets, $flags = 0 ) {
  487. return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
  488. }
  489. public function cleanupBatch( array $files, $flags = 0 ) {
  490. return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
  491. }
  492. public function publish(
  493. $src,
  494. $dstRel,
  495. $archiveRel,
  496. $flags = 0,
  497. array $options = []
  498. ) {
  499. return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
  500. }
  501. public function publishBatch( array $ntuples, $flags = 0 ) {
  502. return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
  503. }
  504. public function delete( $srcRel, $archiveRel ) {
  505. return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
  506. }
  507. public function deleteBatch( array $sourceDestPairs ) {
  508. return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
  509. }
  510. /**
  511. * Skips the write operation if storage is sha1-based, executes it normally otherwise
  512. *
  513. * @param string $function
  514. * @param array $args
  515. * @return Status
  516. */
  517. protected function skipWriteOperationIfSha1( $function, array $args ) {
  518. $this->assertWritableRepo(); // fail out if read-only
  519. if ( $this->hasSha1Storage() ) {
  520. wfDebug( __METHOD__ . ": skipped because storage uses sha1 paths\n" );
  521. return Status::newGood();
  522. } else {
  523. return call_user_func_array( 'parent::' . $function, $args );
  524. }
  525. }
  526. }