FSRepo.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538
  1. <?php
  2. /**
  3. * A repository for files accessible via the local filesystem. Does not support
  4. * database access or registration.
  5. * @ingroup FileRepo
  6. */
  7. class FSRepo extends FileRepo {
  8. var $directory, $deletedDir, $url, $deletedHashLevels;
  9. var $fileFactory = array( 'UnregisteredLocalFile', 'newFromTitle' );
  10. var $oldFileFactory = false;
  11. var $pathDisclosureProtection = 'simple';
  12. function __construct( $info ) {
  13. parent::__construct( $info );
  14. // Required settings
  15. $this->directory = $info['directory'];
  16. $this->url = $info['url'];
  17. // Optional settings
  18. $this->hashLevels = isset( $info['hashLevels'] ) ? $info['hashLevels'] : 2;
  19. $this->deletedHashLevels = isset( $info['deletedHashLevels'] ) ?
  20. $info['deletedHashLevels'] : $this->hashLevels;
  21. $this->deletedDir = isset( $info['deletedDir'] ) ? $info['deletedDir'] : false;
  22. }
  23. /**
  24. * Get the public root directory of the repository.
  25. */
  26. function getRootDirectory() {
  27. return $this->directory;
  28. }
  29. /**
  30. * Get the public root URL of the repository
  31. */
  32. function getRootUrl() {
  33. return $this->url;
  34. }
  35. /**
  36. * Returns true if the repository uses a multi-level directory structure
  37. */
  38. function isHashed() {
  39. return (bool)$this->hashLevels;
  40. }
  41. /**
  42. * Get the local directory corresponding to one of the three basic zones
  43. */
  44. function getZonePath( $zone ) {
  45. switch ( $zone ) {
  46. case 'public':
  47. return $this->directory;
  48. case 'temp':
  49. return "{$this->directory}/temp";
  50. case 'deleted':
  51. return $this->deletedDir;
  52. default:
  53. return false;
  54. }
  55. }
  56. /**
  57. * Get the URL corresponding to one of the three basic zones
  58. */
  59. function getZoneUrl( $zone ) {
  60. switch ( $zone ) {
  61. case 'public':
  62. return $this->url;
  63. case 'temp':
  64. return "{$this->url}/temp";
  65. case 'deleted':
  66. return false; // no public URL
  67. default:
  68. return false;
  69. }
  70. }
  71. /**
  72. * Get a URL referring to this repository, with the private mwrepo protocol.
  73. * The suffix, if supplied, is considered to be unencoded, and will be
  74. * URL-encoded before being returned.
  75. */
  76. function getVirtualUrl( $suffix = false ) {
  77. $path = 'mwrepo://' . $this->name;
  78. if ( $suffix !== false ) {
  79. $path .= '/' . rawurlencode( $suffix );
  80. }
  81. return $path;
  82. }
  83. /**
  84. * Get the local path corresponding to a virtual URL
  85. */
  86. function resolveVirtualUrl( $url ) {
  87. if ( substr( $url, 0, 9 ) != 'mwrepo://' ) {
  88. throw new MWException( __METHOD__.': unknown protoocl' );
  89. }
  90. $bits = explode( '/', substr( $url, 9 ), 3 );
  91. if ( count( $bits ) != 3 ) {
  92. throw new MWException( __METHOD__.": invalid mwrepo URL: $url" );
  93. }
  94. list( $repo, $zone, $rel ) = $bits;
  95. if ( $repo !== $this->name ) {
  96. throw new MWException( __METHOD__.": fetching from a foreign repo is not supported" );
  97. }
  98. $base = $this->getZonePath( $zone );
  99. if ( !$base ) {
  100. throw new MWException( __METHOD__.": invalid zone: $zone" );
  101. }
  102. return $base . '/' . rawurldecode( $rel );
  103. }
  104. /**
  105. * Store a batch of files
  106. *
  107. * @param array $triplets (src,zone,dest) triplets as per store()
  108. * @param integer $flags Bitwise combination of the following flags:
  109. * self::DELETE_SOURCE Delete the source file after upload
  110. * self::OVERWRITE Overwrite an existing destination file instead of failing
  111. * self::OVERWRITE_SAME Overwrite the file if the destination exists and has the
  112. * same contents as the source
  113. */
  114. function storeBatch( $triplets, $flags = 0 ) {
  115. if ( !wfMkdirParents( $this->directory ) ) {
  116. return $this->newFatal( 'upload_directory_missing', $this->directory );
  117. }
  118. if ( !is_writable( $this->directory ) ) {
  119. return $this->newFatal( 'upload_directory_read_only', $this->directory );
  120. }
  121. $status = $this->newGood();
  122. foreach ( $triplets as $i => $triplet ) {
  123. list( $srcPath, $dstZone, $dstRel ) = $triplet;
  124. $root = $this->getZonePath( $dstZone );
  125. if ( !$root ) {
  126. throw new MWException( "Invalid zone: $dstZone" );
  127. }
  128. if ( !$this->validateFilename( $dstRel ) ) {
  129. throw new MWException( 'Validation error in $dstRel' );
  130. }
  131. $dstPath = "$root/$dstRel";
  132. $dstDir = dirname( $dstPath );
  133. if ( !is_dir( $dstDir ) ) {
  134. if ( !wfMkdirParents( $dstDir ) ) {
  135. return $this->newFatal( 'directorycreateerror', $dstDir );
  136. }
  137. if ( $dstZone == 'deleted' ) {
  138. $this->initDeletedDir( $dstDir );
  139. }
  140. }
  141. if ( self::isVirtualUrl( $srcPath ) ) {
  142. $srcPath = $triplets[$i][0] = $this->resolveVirtualUrl( $srcPath );
  143. }
  144. if ( !is_file( $srcPath ) ) {
  145. // Make a list of files that don't exist for return to the caller
  146. $status->fatal( 'filenotfound', $srcPath );
  147. continue;
  148. }
  149. if ( !( $flags & self::OVERWRITE ) && file_exists( $dstPath ) ) {
  150. if ( $flags & self::OVERWRITE_SAME ) {
  151. $hashSource = sha1_file( $srcPath );
  152. $hashDest = sha1_file( $dstPath );
  153. if ( $hashSource != $hashDest ) {
  154. $status->fatal( 'fileexistserror', $dstPath );
  155. }
  156. } else {
  157. $status->fatal( 'fileexistserror', $dstPath );
  158. }
  159. }
  160. }
  161. $deleteDest = wfIsWindows() && ( $flags & self::OVERWRITE );
  162. // Abort now on failure
  163. if ( !$status->ok ) {
  164. return $status;
  165. }
  166. foreach ( $triplets as $triplet ) {
  167. list( $srcPath, $dstZone, $dstRel ) = $triplet;
  168. $root = $this->getZonePath( $dstZone );
  169. $dstPath = "$root/$dstRel";
  170. $good = true;
  171. if ( $flags & self::DELETE_SOURCE ) {
  172. if ( $deleteDest ) {
  173. unlink( $dstPath );
  174. }
  175. if ( !rename( $srcPath, $dstPath ) ) {
  176. $status->error( 'filerenameerror', $srcPath, $dstPath );
  177. $good = false;
  178. }
  179. } else {
  180. if ( !copy( $srcPath, $dstPath ) ) {
  181. $status->error( 'filecopyerror', $srcPath, $dstPath );
  182. $good = false;
  183. }
  184. }
  185. if ( $good ) {
  186. chmod( $dstPath, 0644 );
  187. $status->successCount++;
  188. } else {
  189. $status->failCount++;
  190. }
  191. }
  192. return $status;
  193. }
  194. /**
  195. * Take all available measures to prevent web accessibility of new deleted
  196. * directories, in case the user has not configured offline storage
  197. */
  198. protected function initDeletedDir( $dir ) {
  199. // Add a .htaccess file to the root of the deleted zone
  200. $root = $this->getZonePath( 'deleted' );
  201. if ( !file_exists( "$root/.htaccess" ) ) {
  202. file_put_contents( "$root/.htaccess", "Deny from all\n" );
  203. }
  204. // Seed new directories with a blank index.html, to prevent crawling
  205. file_put_contents( "$dir/index.html", '' );
  206. }
  207. /**
  208. * Pick a random name in the temp zone and store a file to it.
  209. * @param string $originalName The base name of the file as specified
  210. * by the user. The file extension will be maintained.
  211. * @param string $srcPath The current location of the file.
  212. * @return FileRepoStatus object with the URL in the value.
  213. */
  214. function storeTemp( $originalName, $srcPath ) {
  215. $date = gmdate( "YmdHis" );
  216. $hashPath = $this->getHashPath( $originalName );
  217. $dstRel = "$hashPath$date!$originalName";
  218. $dstUrlRel = $hashPath . $date . '!' . rawurlencode( $originalName );
  219. $result = $this->store( $srcPath, 'temp', $dstRel );
  220. $result->value = $this->getVirtualUrl( 'temp' ) . '/' . $dstUrlRel;
  221. return $result;
  222. }
  223. /**
  224. * Remove a temporary file or mark it for garbage collection
  225. * @param string $virtualUrl The virtual URL returned by storeTemp
  226. * @return boolean True on success, false on failure
  227. */
  228. function freeTemp( $virtualUrl ) {
  229. $temp = "mwrepo://{$this->name}/temp";
  230. if ( substr( $virtualUrl, 0, strlen( $temp ) ) != $temp ) {
  231. wfDebug( __METHOD__.": Invalid virtual URL\n" );
  232. return false;
  233. }
  234. $path = $this->resolveVirtualUrl( $virtualUrl );
  235. wfSuppressWarnings();
  236. $success = unlink( $path );
  237. wfRestoreWarnings();
  238. return $success;
  239. }
  240. /**
  241. * Publish a batch of files
  242. * @param array $triplets (source,dest,archive) triplets as per publish()
  243. * @param integer $flags Bitfield, may be FileRepo::DELETE_SOURCE to indicate
  244. * that the source files should be deleted if possible
  245. */
  246. function publishBatch( $triplets, $flags = 0 ) {
  247. // Perform initial checks
  248. if ( !wfMkdirParents( $this->directory ) ) {
  249. return $this->newFatal( 'upload_directory_missing', $this->directory );
  250. }
  251. if ( !is_writable( $this->directory ) ) {
  252. return $this->newFatal( 'upload_directory_read_only', $this->directory );
  253. }
  254. $status = $this->newGood( array() );
  255. foreach ( $triplets as $i => $triplet ) {
  256. list( $srcPath, $dstRel, $archiveRel ) = $triplet;
  257. if ( substr( $srcPath, 0, 9 ) == 'mwrepo://' ) {
  258. $triplets[$i][0] = $srcPath = $this->resolveVirtualUrl( $srcPath );
  259. }
  260. if ( !$this->validateFilename( $dstRel ) ) {
  261. throw new MWException( 'Validation error in $dstRel' );
  262. }
  263. if ( !$this->validateFilename( $archiveRel ) ) {
  264. throw new MWException( 'Validation error in $archiveRel' );
  265. }
  266. $dstPath = "{$this->directory}/$dstRel";
  267. $archivePath = "{$this->directory}/$archiveRel";
  268. $dstDir = dirname( $dstPath );
  269. $archiveDir = dirname( $archivePath );
  270. // Abort immediately on directory creation errors since they're likely to be repetitive
  271. if ( !is_dir( $dstDir ) && !wfMkdirParents( $dstDir ) ) {
  272. return $this->newFatal( 'directorycreateerror', $dstDir );
  273. }
  274. if ( !is_dir( $archiveDir ) && !wfMkdirParents( $archiveDir ) ) {
  275. return $this->newFatal( 'directorycreateerror', $archiveDir );
  276. }
  277. if ( !is_file( $srcPath ) ) {
  278. // Make a list of files that don't exist for return to the caller
  279. $status->fatal( 'filenotfound', $srcPath );
  280. }
  281. }
  282. if ( !$status->ok ) {
  283. return $status;
  284. }
  285. foreach ( $triplets as $i => $triplet ) {
  286. list( $srcPath, $dstRel, $archiveRel ) = $triplet;
  287. $dstPath = "{$this->directory}/$dstRel";
  288. $archivePath = "{$this->directory}/$archiveRel";
  289. // Archive destination file if it exists
  290. if( is_file( $dstPath ) ) {
  291. // Check if the archive file exists
  292. // This is a sanity check to avoid data loss. In UNIX, the rename primitive
  293. // unlinks the destination file if it exists. DB-based synchronisation in
  294. // publishBatch's caller should prevent races. In Windows there's no
  295. // problem because the rename primitive fails if the destination exists.
  296. if ( is_file( $archivePath ) ) {
  297. $success = false;
  298. } else {
  299. wfSuppressWarnings();
  300. $success = rename( $dstPath, $archivePath );
  301. wfRestoreWarnings();
  302. }
  303. if( !$success ) {
  304. $status->error( 'filerenameerror',$dstPath, $archivePath );
  305. $status->failCount++;
  306. continue;
  307. } else {
  308. wfDebug(__METHOD__.": moved file $dstPath to $archivePath\n");
  309. }
  310. $status->value[$i] = 'archived';
  311. } else {
  312. $status->value[$i] = 'new';
  313. }
  314. $good = true;
  315. wfSuppressWarnings();
  316. if ( $flags & self::DELETE_SOURCE ) {
  317. if ( !rename( $srcPath, $dstPath ) ) {
  318. $status->error( 'filerenameerror', $srcPath, $dstPath );
  319. $good = false;
  320. }
  321. } else {
  322. if ( !copy( $srcPath, $dstPath ) ) {
  323. $status->error( 'filecopyerror', $srcPath, $dstPath );
  324. $good = false;
  325. }
  326. }
  327. wfRestoreWarnings();
  328. if ( $good ) {
  329. $status->successCount++;
  330. wfDebug(__METHOD__.": wrote tempfile $srcPath to $dstPath\n");
  331. // Thread-safe override for umask
  332. chmod( $dstPath, 0644 );
  333. } else {
  334. $status->failCount++;
  335. }
  336. }
  337. return $status;
  338. }
  339. /**
  340. * Move a group of files to the deletion archive.
  341. * If no valid deletion archive is configured, this may either delete the
  342. * file or throw an exception, depending on the preference of the repository.
  343. *
  344. * @param array $sourceDestPairs Array of source/destination pairs. Each element
  345. * is a two-element array containing the source file path relative to the
  346. * public root in the first element, and the archive file path relative
  347. * to the deleted zone root in the second element.
  348. * @return FileRepoStatus
  349. */
  350. function deleteBatch( $sourceDestPairs ) {
  351. $status = $this->newGood();
  352. if ( !$this->deletedDir ) {
  353. throw new MWException( __METHOD__.': no valid deletion archive directory' );
  354. }
  355. /**
  356. * Validate filenames and create archive directories
  357. */
  358. foreach ( $sourceDestPairs as $pair ) {
  359. list( $srcRel, $archiveRel ) = $pair;
  360. if ( !$this->validateFilename( $srcRel ) ) {
  361. throw new MWException( __METHOD__.':Validation error in $srcRel' );
  362. }
  363. if ( !$this->validateFilename( $archiveRel ) ) {
  364. throw new MWException( __METHOD__.':Validation error in $archiveRel' );
  365. }
  366. $archivePath = "{$this->deletedDir}/$archiveRel";
  367. $archiveDir = dirname( $archivePath );
  368. if ( !is_dir( $archiveDir ) ) {
  369. if ( !wfMkdirParents( $archiveDir ) ) {
  370. $status->fatal( 'directorycreateerror', $archiveDir );
  371. continue;
  372. }
  373. $this->initDeletedDir( $archiveDir );
  374. }
  375. // Check if the archive directory is writable
  376. // This doesn't appear to work on NTFS
  377. if ( !is_writable( $archiveDir ) ) {
  378. $status->fatal( 'filedelete-archive-read-only', $archiveDir );
  379. }
  380. }
  381. if ( !$status->ok ) {
  382. // Abort early
  383. return $status;
  384. }
  385. /**
  386. * Move the files
  387. * We're now committed to returning an OK result, which will lead to
  388. * the files being moved in the DB also.
  389. */
  390. foreach ( $sourceDestPairs as $pair ) {
  391. list( $srcRel, $archiveRel ) = $pair;
  392. $srcPath = "{$this->directory}/$srcRel";
  393. $archivePath = "{$this->deletedDir}/$archiveRel";
  394. $good = true;
  395. if ( file_exists( $archivePath ) ) {
  396. # A file with this content hash is already archived
  397. if ( !@unlink( $srcPath ) ) {
  398. $status->error( 'filedeleteerror', $srcPath );
  399. $good = false;
  400. }
  401. } else{
  402. if ( !@rename( $srcPath, $archivePath ) ) {
  403. $status->error( 'filerenameerror', $srcPath, $archivePath );
  404. $good = false;
  405. } else {
  406. @chmod( $archivePath, 0644 );
  407. }
  408. }
  409. if ( $good ) {
  410. $status->successCount++;
  411. } else {
  412. $status->failCount++;
  413. }
  414. }
  415. return $status;
  416. }
  417. /**
  418. * Get a relative path for a deletion archive key,
  419. * e.g. s/z/a/ for sza251lrxrc1jad41h5mgilp8nysje52.jpg
  420. */
  421. function getDeletedHashPath( $key ) {
  422. $path = '';
  423. for ( $i = 0; $i < $this->deletedHashLevels; $i++ ) {
  424. $path .= $key[$i] . '/';
  425. }
  426. return $path;
  427. }
  428. /**
  429. * Call a callback function for every file in the repository.
  430. * Uses the filesystem even in child classes.
  431. */
  432. function enumFilesInFS( $callback ) {
  433. $numDirs = 1 << ( $this->hashLevels * 4 );
  434. for ( $flatIndex = 0; $flatIndex < $numDirs; $flatIndex++ ) {
  435. $hexString = sprintf( "%0{$this->hashLevels}x", $flatIndex );
  436. $path = $this->directory;
  437. for ( $hexPos = 0; $hexPos < $this->hashLevels; $hexPos++ ) {
  438. $path .= '/' . substr( $hexString, 0, $hexPos + 1 );
  439. }
  440. if ( !file_exists( $path ) || !is_dir( $path ) ) {
  441. continue;
  442. }
  443. $dir = opendir( $path );
  444. while ( false !== ( $name = readdir( $dir ) ) ) {
  445. call_user_func( $callback, $path . '/' . $name );
  446. }
  447. }
  448. }
  449. /**
  450. * Call a callback function for every file in the repository
  451. * May use either the database or the filesystem
  452. */
  453. function enumFiles( $callback ) {
  454. $this->enumFilesInFS( $callback );
  455. }
  456. /**
  457. * Get properties of a file with a given virtual URL
  458. * The virtual URL must refer to this repo
  459. */
  460. function getFileProps( $virtualUrl ) {
  461. $path = $this->resolveVirtualUrl( $virtualUrl );
  462. return File::getPropsFromPath( $path );
  463. }
  464. /**
  465. * Path disclosure protection functions
  466. *
  467. * Get a callback function to use for cleaning error message parameters
  468. */
  469. function getErrorCleanupFunction() {
  470. switch ( $this->pathDisclosureProtection ) {
  471. case 'simple':
  472. $callback = array( $this, 'simpleClean' );
  473. break;
  474. default:
  475. $callback = parent::getErrorCleanupFunction();
  476. }
  477. return $callback;
  478. }
  479. function simpleClean( $param ) {
  480. if ( !isset( $this->simpleCleanPairs ) ) {
  481. global $IP;
  482. $this->simpleCleanPairs = array(
  483. $this->directory => 'public',
  484. "{$this->directory}/temp" => 'temp',
  485. $IP => '$IP',
  486. dirname( __FILE__ ) => '$IP/extensions/WebStore',
  487. );
  488. if ( $this->deletedDir ) {
  489. $this->simpleCleanPairs[$this->deletedDir] = 'deleted';
  490. }
  491. }
  492. return strtr( $param, $this->simpleCleanPairs );
  493. }
  494. }