xfs_mount.c 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (c) 2000-2005 Silicon Graphics, Inc.
  4. * All Rights Reserved.
  5. */
  6. #include "xfs.h"
  7. #include "xfs_fs.h"
  8. #include "xfs_shared.h"
  9. #include "xfs_format.h"
  10. #include "xfs_log_format.h"
  11. #include "xfs_trans_resv.h"
  12. #include "xfs_bit.h"
  13. #include "xfs_sb.h"
  14. #include "xfs_mount.h"
  15. #include "xfs_defer.h"
  16. #include "xfs_da_format.h"
  17. #include "xfs_da_btree.h"
  18. #include "xfs_inode.h"
  19. #include "xfs_dir2.h"
  20. #include "xfs_ialloc.h"
  21. #include "xfs_alloc.h"
  22. #include "xfs_rtalloc.h"
  23. #include "xfs_bmap.h"
  24. #include "xfs_trans.h"
  25. #include "xfs_trans_priv.h"
  26. #include "xfs_log.h"
  27. #include "xfs_error.h"
  28. #include "xfs_quota.h"
  29. #include "xfs_fsops.h"
  30. #include "xfs_trace.h"
  31. #include "xfs_icache.h"
  32. #include "xfs_sysfs.h"
  33. #include "xfs_rmap_btree.h"
  34. #include "xfs_refcount_btree.h"
  35. #include "xfs_reflink.h"
  36. #include "xfs_extent_busy.h"
  37. static DEFINE_MUTEX(xfs_uuid_table_mutex);
  38. static int xfs_uuid_table_size;
  39. static uuid_t *xfs_uuid_table;
  40. void
  41. xfs_uuid_table_free(void)
  42. {
  43. if (xfs_uuid_table_size == 0)
  44. return;
  45. kmem_free(xfs_uuid_table);
  46. xfs_uuid_table = NULL;
  47. xfs_uuid_table_size = 0;
  48. }
  49. /*
  50. * See if the UUID is unique among mounted XFS filesystems.
  51. * Mount fails if UUID is nil or a FS with the same UUID is already mounted.
  52. */
  53. STATIC int
  54. xfs_uuid_mount(
  55. struct xfs_mount *mp)
  56. {
  57. uuid_t *uuid = &mp->m_sb.sb_uuid;
  58. int hole, i;
  59. /* Publish UUID in struct super_block */
  60. uuid_copy(&mp->m_super->s_uuid, uuid);
  61. if (mp->m_flags & XFS_MOUNT_NOUUID)
  62. return 0;
  63. if (uuid_is_null(uuid)) {
  64. xfs_warn(mp, "Filesystem has null UUID - can't mount");
  65. return -EINVAL;
  66. }
  67. mutex_lock(&xfs_uuid_table_mutex);
  68. for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) {
  69. if (uuid_is_null(&xfs_uuid_table[i])) {
  70. hole = i;
  71. continue;
  72. }
  73. if (uuid_equal(uuid, &xfs_uuid_table[i]))
  74. goto out_duplicate;
  75. }
  76. if (hole < 0) {
  77. xfs_uuid_table = kmem_realloc(xfs_uuid_table,
  78. (xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table),
  79. KM_SLEEP);
  80. hole = xfs_uuid_table_size++;
  81. }
  82. xfs_uuid_table[hole] = *uuid;
  83. mutex_unlock(&xfs_uuid_table_mutex);
  84. return 0;
  85. out_duplicate:
  86. mutex_unlock(&xfs_uuid_table_mutex);
  87. xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid);
  88. return -EINVAL;
  89. }
  90. STATIC void
  91. xfs_uuid_unmount(
  92. struct xfs_mount *mp)
  93. {
  94. uuid_t *uuid = &mp->m_sb.sb_uuid;
  95. int i;
  96. if (mp->m_flags & XFS_MOUNT_NOUUID)
  97. return;
  98. mutex_lock(&xfs_uuid_table_mutex);
  99. for (i = 0; i < xfs_uuid_table_size; i++) {
  100. if (uuid_is_null(&xfs_uuid_table[i]))
  101. continue;
  102. if (!uuid_equal(uuid, &xfs_uuid_table[i]))
  103. continue;
  104. memset(&xfs_uuid_table[i], 0, sizeof(uuid_t));
  105. break;
  106. }
  107. ASSERT(i < xfs_uuid_table_size);
  108. mutex_unlock(&xfs_uuid_table_mutex);
  109. }
  110. STATIC void
  111. __xfs_free_perag(
  112. struct rcu_head *head)
  113. {
  114. struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
  115. ASSERT(atomic_read(&pag->pag_ref) == 0);
  116. kmem_free(pag);
  117. }
  118. /*
  119. * Free up the per-ag resources associated with the mount structure.
  120. */
  121. STATIC void
  122. xfs_free_perag(
  123. xfs_mount_t *mp)
  124. {
  125. xfs_agnumber_t agno;
  126. struct xfs_perag *pag;
  127. for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
  128. spin_lock(&mp->m_perag_lock);
  129. pag = radix_tree_delete(&mp->m_perag_tree, agno);
  130. spin_unlock(&mp->m_perag_lock);
  131. ASSERT(pag);
  132. ASSERT(atomic_read(&pag->pag_ref) == 0);
  133. xfs_buf_hash_destroy(pag);
  134. mutex_destroy(&pag->pag_ici_reclaim_lock);
  135. call_rcu(&pag->rcu_head, __xfs_free_perag);
  136. }
  137. }
  138. /*
  139. * Check size of device based on the (data/realtime) block count.
  140. * Note: this check is used by the growfs code as well as mount.
  141. */
  142. int
  143. xfs_sb_validate_fsb_count(
  144. xfs_sb_t *sbp,
  145. uint64_t nblocks)
  146. {
  147. ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
  148. ASSERT(sbp->sb_blocklog >= BBSHIFT);
  149. /* Limited by ULONG_MAX of page cache index */
  150. if (nblocks >> (PAGE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
  151. return -EFBIG;
  152. return 0;
  153. }
  154. int
  155. xfs_initialize_perag(
  156. xfs_mount_t *mp,
  157. xfs_agnumber_t agcount,
  158. xfs_agnumber_t *maxagi)
  159. {
  160. xfs_agnumber_t index;
  161. xfs_agnumber_t first_initialised = NULLAGNUMBER;
  162. xfs_perag_t *pag;
  163. int error = -ENOMEM;
  164. /*
  165. * Walk the current per-ag tree so we don't try to initialise AGs
  166. * that already exist (growfs case). Allocate and insert all the
  167. * AGs we don't find ready for initialisation.
  168. */
  169. for (index = 0; index < agcount; index++) {
  170. pag = xfs_perag_get(mp, index);
  171. if (pag) {
  172. xfs_perag_put(pag);
  173. continue;
  174. }
  175. pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
  176. if (!pag)
  177. goto out_unwind_new_pags;
  178. pag->pag_agno = index;
  179. pag->pag_mount = mp;
  180. spin_lock_init(&pag->pag_ici_lock);
  181. mutex_init(&pag->pag_ici_reclaim_lock);
  182. INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
  183. if (xfs_buf_hash_init(pag))
  184. goto out_free_pag;
  185. init_waitqueue_head(&pag->pagb_wait);
  186. spin_lock_init(&pag->pagb_lock);
  187. pag->pagb_count = 0;
  188. pag->pagb_tree = RB_ROOT;
  189. if (radix_tree_preload(GFP_NOFS))
  190. goto out_hash_destroy;
  191. spin_lock(&mp->m_perag_lock);
  192. if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
  193. BUG();
  194. spin_unlock(&mp->m_perag_lock);
  195. radix_tree_preload_end();
  196. error = -EEXIST;
  197. goto out_hash_destroy;
  198. }
  199. spin_unlock(&mp->m_perag_lock);
  200. radix_tree_preload_end();
  201. /* first new pag is fully initialized */
  202. if (first_initialised == NULLAGNUMBER)
  203. first_initialised = index;
  204. }
  205. index = xfs_set_inode_alloc(mp, agcount);
  206. if (maxagi)
  207. *maxagi = index;
  208. mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp);
  209. return 0;
  210. out_hash_destroy:
  211. xfs_buf_hash_destroy(pag);
  212. out_free_pag:
  213. mutex_destroy(&pag->pag_ici_reclaim_lock);
  214. kmem_free(pag);
  215. out_unwind_new_pags:
  216. /* unwind any prior newly initialized pags */
  217. for (index = first_initialised; index < agcount; index++) {
  218. pag = radix_tree_delete(&mp->m_perag_tree, index);
  219. if (!pag)
  220. break;
  221. xfs_buf_hash_destroy(pag);
  222. mutex_destroy(&pag->pag_ici_reclaim_lock);
  223. kmem_free(pag);
  224. }
  225. return error;
  226. }
  227. /*
  228. * xfs_readsb
  229. *
  230. * Does the initial read of the superblock.
  231. */
  232. int
  233. xfs_readsb(
  234. struct xfs_mount *mp,
  235. int flags)
  236. {
  237. unsigned int sector_size;
  238. struct xfs_buf *bp;
  239. struct xfs_sb *sbp = &mp->m_sb;
  240. int error;
  241. int loud = !(flags & XFS_MFSI_QUIET);
  242. const struct xfs_buf_ops *buf_ops;
  243. ASSERT(mp->m_sb_bp == NULL);
  244. ASSERT(mp->m_ddev_targp != NULL);
  245. /*
  246. * For the initial read, we must guess at the sector
  247. * size based on the block device. It's enough to
  248. * get the sb_sectsize out of the superblock and
  249. * then reread with the proper length.
  250. * We don't verify it yet, because it may not be complete.
  251. */
  252. sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
  253. buf_ops = NULL;
  254. /*
  255. * Allocate a (locked) buffer to hold the superblock. This will be kept
  256. * around at all times to optimize access to the superblock. Therefore,
  257. * set XBF_NO_IOACCT to make sure it doesn't hold the buftarg count
  258. * elevated.
  259. */
  260. reread:
  261. error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
  262. BTOBB(sector_size), XBF_NO_IOACCT, &bp,
  263. buf_ops);
  264. if (error) {
  265. if (loud)
  266. xfs_warn(mp, "SB validate failed with error %d.", error);
  267. /* bad CRC means corrupted metadata */
  268. if (error == -EFSBADCRC)
  269. error = -EFSCORRUPTED;
  270. return error;
  271. }
  272. /*
  273. * Initialize the mount structure from the superblock.
  274. */
  275. xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
  276. /*
  277. * If we haven't validated the superblock, do so now before we try
  278. * to check the sector size and reread the superblock appropriately.
  279. */
  280. if (sbp->sb_magicnum != XFS_SB_MAGIC) {
  281. if (loud)
  282. xfs_warn(mp, "Invalid superblock magic number");
  283. error = -EINVAL;
  284. goto release_buf;
  285. }
  286. /*
  287. * We must be able to do sector-sized and sector-aligned IO.
  288. */
  289. if (sector_size > sbp->sb_sectsize) {
  290. if (loud)
  291. xfs_warn(mp, "device supports %u byte sectors (not %u)",
  292. sector_size, sbp->sb_sectsize);
  293. error = -ENOSYS;
  294. goto release_buf;
  295. }
  296. if (buf_ops == NULL) {
  297. /*
  298. * Re-read the superblock so the buffer is correctly sized,
  299. * and properly verified.
  300. */
  301. xfs_buf_relse(bp);
  302. sector_size = sbp->sb_sectsize;
  303. buf_ops = loud ? &xfs_sb_buf_ops : &xfs_sb_quiet_buf_ops;
  304. goto reread;
  305. }
  306. xfs_reinit_percpu_counters(mp);
  307. /* no need to be quiet anymore, so reset the buf ops */
  308. bp->b_ops = &xfs_sb_buf_ops;
  309. mp->m_sb_bp = bp;
  310. xfs_buf_unlock(bp);
  311. return 0;
  312. release_buf:
  313. xfs_buf_relse(bp);
  314. return error;
  315. }
  316. /*
  317. * Update alignment values based on mount options and sb values
  318. */
  319. STATIC int
  320. xfs_update_alignment(xfs_mount_t *mp)
  321. {
  322. xfs_sb_t *sbp = &(mp->m_sb);
  323. if (mp->m_dalign) {
  324. /*
  325. * If stripe unit and stripe width are not multiples
  326. * of the fs blocksize turn off alignment.
  327. */
  328. if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
  329. (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
  330. xfs_warn(mp,
  331. "alignment check failed: sunit/swidth vs. blocksize(%d)",
  332. sbp->sb_blocksize);
  333. return -EINVAL;
  334. } else {
  335. /*
  336. * Convert the stripe unit and width to FSBs.
  337. */
  338. mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
  339. if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
  340. xfs_warn(mp,
  341. "alignment check failed: sunit/swidth vs. agsize(%d)",
  342. sbp->sb_agblocks);
  343. return -EINVAL;
  344. } else if (mp->m_dalign) {
  345. mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
  346. } else {
  347. xfs_warn(mp,
  348. "alignment check failed: sunit(%d) less than bsize(%d)",
  349. mp->m_dalign, sbp->sb_blocksize);
  350. return -EINVAL;
  351. }
  352. }
  353. /*
  354. * Update superblock with new values
  355. * and log changes
  356. */
  357. if (xfs_sb_version_hasdalign(sbp)) {
  358. if (sbp->sb_unit != mp->m_dalign) {
  359. sbp->sb_unit = mp->m_dalign;
  360. mp->m_update_sb = true;
  361. }
  362. if (sbp->sb_width != mp->m_swidth) {
  363. sbp->sb_width = mp->m_swidth;
  364. mp->m_update_sb = true;
  365. }
  366. } else {
  367. xfs_warn(mp,
  368. "cannot change alignment: superblock does not support data alignment");
  369. return -EINVAL;
  370. }
  371. } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
  372. xfs_sb_version_hasdalign(&mp->m_sb)) {
  373. mp->m_dalign = sbp->sb_unit;
  374. mp->m_swidth = sbp->sb_width;
  375. }
  376. return 0;
  377. }
  378. /*
  379. * Set the maximum inode count for this filesystem
  380. */
  381. STATIC void
  382. xfs_set_maxicount(xfs_mount_t *mp)
  383. {
  384. xfs_sb_t *sbp = &(mp->m_sb);
  385. uint64_t icount;
  386. if (sbp->sb_imax_pct) {
  387. /*
  388. * Make sure the maximum inode count is a multiple
  389. * of the units we allocate inodes in.
  390. */
  391. icount = sbp->sb_dblocks * sbp->sb_imax_pct;
  392. do_div(icount, 100);
  393. do_div(icount, mp->m_ialloc_blks);
  394. mp->m_maxicount = (icount * mp->m_ialloc_blks) <<
  395. sbp->sb_inopblog;
  396. } else {
  397. mp->m_maxicount = 0;
  398. }
  399. }
  400. /*
  401. * Set the default minimum read and write sizes unless
  402. * already specified in a mount option.
  403. * We use smaller I/O sizes when the file system
  404. * is being used for NFS service (wsync mount option).
  405. */
  406. STATIC void
  407. xfs_set_rw_sizes(xfs_mount_t *mp)
  408. {
  409. xfs_sb_t *sbp = &(mp->m_sb);
  410. int readio_log, writeio_log;
  411. if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
  412. if (mp->m_flags & XFS_MOUNT_WSYNC) {
  413. readio_log = XFS_WSYNC_READIO_LOG;
  414. writeio_log = XFS_WSYNC_WRITEIO_LOG;
  415. } else {
  416. readio_log = XFS_READIO_LOG_LARGE;
  417. writeio_log = XFS_WRITEIO_LOG_LARGE;
  418. }
  419. } else {
  420. readio_log = mp->m_readio_log;
  421. writeio_log = mp->m_writeio_log;
  422. }
  423. if (sbp->sb_blocklog > readio_log) {
  424. mp->m_readio_log = sbp->sb_blocklog;
  425. } else {
  426. mp->m_readio_log = readio_log;
  427. }
  428. mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog);
  429. if (sbp->sb_blocklog > writeio_log) {
  430. mp->m_writeio_log = sbp->sb_blocklog;
  431. } else {
  432. mp->m_writeio_log = writeio_log;
  433. }
  434. mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog);
  435. }
  436. /*
  437. * precalculate the low space thresholds for dynamic speculative preallocation.
  438. */
  439. void
  440. xfs_set_low_space_thresholds(
  441. struct xfs_mount *mp)
  442. {
  443. int i;
  444. for (i = 0; i < XFS_LOWSP_MAX; i++) {
  445. uint64_t space = mp->m_sb.sb_dblocks;
  446. do_div(space, 100);
  447. mp->m_low_space[i] = space * (i + 1);
  448. }
  449. }
  450. /*
  451. * Set whether we're using inode alignment.
  452. */
  453. STATIC void
  454. xfs_set_inoalignment(xfs_mount_t *mp)
  455. {
  456. if (xfs_sb_version_hasalign(&mp->m_sb) &&
  457. mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
  458. mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
  459. else
  460. mp->m_inoalign_mask = 0;
  461. /*
  462. * If we are using stripe alignment, check whether
  463. * the stripe unit is a multiple of the inode alignment
  464. */
  465. if (mp->m_dalign && mp->m_inoalign_mask &&
  466. !(mp->m_dalign & mp->m_inoalign_mask))
  467. mp->m_sinoalign = mp->m_dalign;
  468. else
  469. mp->m_sinoalign = 0;
  470. }
  471. /*
  472. * Check that the data (and log if separate) is an ok size.
  473. */
  474. STATIC int
  475. xfs_check_sizes(
  476. struct xfs_mount *mp)
  477. {
  478. struct xfs_buf *bp;
  479. xfs_daddr_t d;
  480. int error;
  481. d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
  482. if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
  483. xfs_warn(mp, "filesystem size mismatch detected");
  484. return -EFBIG;
  485. }
  486. error = xfs_buf_read_uncached(mp->m_ddev_targp,
  487. d - XFS_FSS_TO_BB(mp, 1),
  488. XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
  489. if (error) {
  490. xfs_warn(mp, "last sector read failed");
  491. return error;
  492. }
  493. xfs_buf_relse(bp);
  494. if (mp->m_logdev_targp == mp->m_ddev_targp)
  495. return 0;
  496. d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
  497. if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
  498. xfs_warn(mp, "log size mismatch detected");
  499. return -EFBIG;
  500. }
  501. error = xfs_buf_read_uncached(mp->m_logdev_targp,
  502. d - XFS_FSB_TO_BB(mp, 1),
  503. XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
  504. if (error) {
  505. xfs_warn(mp, "log device read failed");
  506. return error;
  507. }
  508. xfs_buf_relse(bp);
  509. return 0;
  510. }
  511. /*
  512. * Clear the quotaflags in memory and in the superblock.
  513. */
  514. int
  515. xfs_mount_reset_sbqflags(
  516. struct xfs_mount *mp)
  517. {
  518. mp->m_qflags = 0;
  519. /* It is OK to look at sb_qflags in the mount path without m_sb_lock. */
  520. if (mp->m_sb.sb_qflags == 0)
  521. return 0;
  522. spin_lock(&mp->m_sb_lock);
  523. mp->m_sb.sb_qflags = 0;
  524. spin_unlock(&mp->m_sb_lock);
  525. if (!xfs_fs_writable(mp, SB_FREEZE_WRITE))
  526. return 0;
  527. return xfs_sync_sb(mp, false);
  528. }
  529. uint64_t
  530. xfs_default_resblks(xfs_mount_t *mp)
  531. {
  532. uint64_t resblks;
  533. /*
  534. * We default to 5% or 8192 fsbs of space reserved, whichever is
  535. * smaller. This is intended to cover concurrent allocation
  536. * transactions when we initially hit enospc. These each require a 4
  537. * block reservation. Hence by default we cover roughly 2000 concurrent
  538. * allocation reservations.
  539. */
  540. resblks = mp->m_sb.sb_dblocks;
  541. do_div(resblks, 20);
  542. resblks = min_t(uint64_t, resblks, 8192);
  543. return resblks;
  544. }
  545. /* Ensure the summary counts are correct. */
  546. STATIC int
  547. xfs_check_summary_counts(
  548. struct xfs_mount *mp)
  549. {
  550. /*
  551. * The AG0 superblock verifier rejects in-progress filesystems,
  552. * so we should never see the flag set this far into mounting.
  553. */
  554. if (mp->m_sb.sb_inprogress) {
  555. xfs_err(mp, "sb_inprogress set after log recovery??");
  556. WARN_ON(1);
  557. return -EFSCORRUPTED;
  558. }
  559. /*
  560. * Now the log is mounted, we know if it was an unclean shutdown or
  561. * not. If it was, with the first phase of recovery has completed, we
  562. * have consistent AG blocks on disk. We have not recovered EFIs yet,
  563. * but they are recovered transactionally in the second recovery phase
  564. * later.
  565. *
  566. * If the log was clean when we mounted, we can check the summary
  567. * counters. If any of them are obviously incorrect, we can recompute
  568. * them from the AGF headers in the next step.
  569. */
  570. if (XFS_LAST_UNMOUNT_WAS_CLEAN(mp) &&
  571. (mp->m_sb.sb_fdblocks > mp->m_sb.sb_dblocks ||
  572. !xfs_verify_icount(mp, mp->m_sb.sb_icount) ||
  573. mp->m_sb.sb_ifree > mp->m_sb.sb_icount))
  574. mp->m_flags |= XFS_MOUNT_BAD_SUMMARY;
  575. /*
  576. * We can safely re-initialise incore superblock counters from the
  577. * per-ag data. These may not be correct if the filesystem was not
  578. * cleanly unmounted, so we waited for recovery to finish before doing
  579. * this.
  580. *
  581. * If the filesystem was cleanly unmounted or the previous check did
  582. * not flag anything weird, then we can trust the values in the
  583. * superblock to be correct and we don't need to do anything here.
  584. * Otherwise, recalculate the summary counters.
  585. */
  586. if ((!xfs_sb_version_haslazysbcount(&mp->m_sb) ||
  587. XFS_LAST_UNMOUNT_WAS_CLEAN(mp)) &&
  588. !(mp->m_flags & XFS_MOUNT_BAD_SUMMARY))
  589. return 0;
  590. return xfs_initialize_perag_data(mp, mp->m_sb.sb_agcount);
  591. }
  592. /*
  593. * This function does the following on an initial mount of a file system:
  594. * - reads the superblock from disk and init the mount struct
  595. * - if we're a 32-bit kernel, do a size check on the superblock
  596. * so we don't mount terabyte filesystems
  597. * - init mount struct realtime fields
  598. * - allocate inode hash table for fs
  599. * - init directory manager
  600. * - perform recovery and init the log manager
  601. */
  602. int
  603. xfs_mountfs(
  604. struct xfs_mount *mp)
  605. {
  606. struct xfs_sb *sbp = &(mp->m_sb);
  607. struct xfs_inode *rip;
  608. uint64_t resblks;
  609. uint quotamount = 0;
  610. uint quotaflags = 0;
  611. int error = 0;
  612. xfs_sb_mount_common(mp, sbp);
  613. /*
  614. * Check for a mismatched features2 values. Older kernels read & wrote
  615. * into the wrong sb offset for sb_features2 on some platforms due to
  616. * xfs_sb_t not being 64bit size aligned when sb_features2 was added,
  617. * which made older superblock reading/writing routines swap it as a
  618. * 64-bit value.
  619. *
  620. * For backwards compatibility, we make both slots equal.
  621. *
  622. * If we detect a mismatched field, we OR the set bits into the existing
  623. * features2 field in case it has already been modified; we don't want
  624. * to lose any features. We then update the bad location with the ORed
  625. * value so that older kernels will see any features2 flags. The
  626. * superblock writeback code ensures the new sb_features2 is copied to
  627. * sb_bad_features2 before it is logged or written to disk.
  628. */
  629. if (xfs_sb_has_mismatched_features2(sbp)) {
  630. xfs_warn(mp, "correcting sb_features alignment problem");
  631. sbp->sb_features2 |= sbp->sb_bad_features2;
  632. mp->m_update_sb = true;
  633. /*
  634. * Re-check for ATTR2 in case it was found in bad_features2
  635. * slot.
  636. */
  637. if (xfs_sb_version_hasattr2(&mp->m_sb) &&
  638. !(mp->m_flags & XFS_MOUNT_NOATTR2))
  639. mp->m_flags |= XFS_MOUNT_ATTR2;
  640. }
  641. if (xfs_sb_version_hasattr2(&mp->m_sb) &&
  642. (mp->m_flags & XFS_MOUNT_NOATTR2)) {
  643. xfs_sb_version_removeattr2(&mp->m_sb);
  644. mp->m_update_sb = true;
  645. /* update sb_versionnum for the clearing of the morebits */
  646. if (!sbp->sb_features2)
  647. mp->m_update_sb = true;
  648. }
  649. /* always use v2 inodes by default now */
  650. if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) {
  651. mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT;
  652. mp->m_update_sb = true;
  653. }
  654. /*
  655. * Check if sb_agblocks is aligned at stripe boundary
  656. * If sb_agblocks is NOT aligned turn off m_dalign since
  657. * allocator alignment is within an ag, therefore ag has
  658. * to be aligned at stripe boundary.
  659. */
  660. error = xfs_update_alignment(mp);
  661. if (error)
  662. goto out;
  663. xfs_alloc_compute_maxlevels(mp);
  664. xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
  665. xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
  666. xfs_ialloc_compute_maxlevels(mp);
  667. xfs_rmapbt_compute_maxlevels(mp);
  668. xfs_refcountbt_compute_maxlevels(mp);
  669. xfs_set_maxicount(mp);
  670. /* enable fail_at_unmount as default */
  671. mp->m_fail_unmount = true;
  672. error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);
  673. if (error)
  674. goto out;
  675. error = xfs_sysfs_init(&mp->m_stats.xs_kobj, &xfs_stats_ktype,
  676. &mp->m_kobj, "stats");
  677. if (error)
  678. goto out_remove_sysfs;
  679. error = xfs_error_sysfs_init(mp);
  680. if (error)
  681. goto out_del_stats;
  682. error = xfs_errortag_init(mp);
  683. if (error)
  684. goto out_remove_error_sysfs;
  685. error = xfs_uuid_mount(mp);
  686. if (error)
  687. goto out_remove_errortag;
  688. /*
  689. * Set the minimum read and write sizes
  690. */
  691. xfs_set_rw_sizes(mp);
  692. /* set the low space thresholds for dynamic preallocation */
  693. xfs_set_low_space_thresholds(mp);
  694. /*
  695. * Set the inode cluster size.
  696. * This may still be overridden by the file system
  697. * block size if it is larger than the chosen cluster size.
  698. *
  699. * For v5 filesystems, scale the cluster size with the inode size to
  700. * keep a constant ratio of inode per cluster buffer, but only if mkfs
  701. * has set the inode alignment value appropriately for larger cluster
  702. * sizes.
  703. */
  704. mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
  705. if (xfs_sb_version_hascrc(&mp->m_sb)) {
  706. int new_size = mp->m_inode_cluster_size;
  707. new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
  708. if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
  709. mp->m_inode_cluster_size = new_size;
  710. }
  711. /*
  712. * If enabled, sparse inode chunk alignment is expected to match the
  713. * cluster size. Full inode chunk alignment must match the chunk size,
  714. * but that is checked on sb read verification...
  715. */
  716. if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
  717. mp->m_sb.sb_spino_align !=
  718. XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) {
  719. xfs_warn(mp,
  720. "Sparse inode block alignment (%u) must match cluster size (%llu).",
  721. mp->m_sb.sb_spino_align,
  722. XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size));
  723. error = -EINVAL;
  724. goto out_remove_uuid;
  725. }
  726. /*
  727. * Set inode alignment fields
  728. */
  729. xfs_set_inoalignment(mp);
  730. /*
  731. * Check that the data (and log if separate) is an ok size.
  732. */
  733. error = xfs_check_sizes(mp);
  734. if (error)
  735. goto out_remove_uuid;
  736. /*
  737. * Initialize realtime fields in the mount structure
  738. */
  739. error = xfs_rtmount_init(mp);
  740. if (error) {
  741. xfs_warn(mp, "RT mount failed");
  742. goto out_remove_uuid;
  743. }
  744. /*
  745. * Copies the low order bits of the timestamp and the randomly
  746. * set "sequence" number out of a UUID.
  747. */
  748. mp->m_fixedfsid[0] =
  749. (get_unaligned_be16(&sbp->sb_uuid.b[8]) << 16) |
  750. get_unaligned_be16(&sbp->sb_uuid.b[4]);
  751. mp->m_fixedfsid[1] = get_unaligned_be32(&sbp->sb_uuid.b[0]);
  752. error = xfs_da_mount(mp);
  753. if (error) {
  754. xfs_warn(mp, "Failed dir/attr init: %d", error);
  755. goto out_remove_uuid;
  756. }
  757. /*
  758. * Initialize the precomputed transaction reservations values.
  759. */
  760. xfs_trans_init(mp);
  761. /*
  762. * Allocate and initialize the per-ag data.
  763. */
  764. error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
  765. if (error) {
  766. xfs_warn(mp, "Failed per-ag init: %d", error);
  767. goto out_free_dir;
  768. }
  769. if (!sbp->sb_logblocks) {
  770. xfs_warn(mp, "no log defined");
  771. XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp);
  772. error = -EFSCORRUPTED;
  773. goto out_free_perag;
  774. }
  775. /*
  776. * Log's mount-time initialization. The first part of recovery can place
  777. * some items on the AIL, to be handled when recovery is finished or
  778. * cancelled.
  779. */
  780. error = xfs_log_mount(mp, mp->m_logdev_targp,
  781. XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
  782. XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
  783. if (error) {
  784. xfs_warn(mp, "log mount failed");
  785. goto out_fail_wait;
  786. }
  787. /* Make sure the summary counts are ok. */
  788. error = xfs_check_summary_counts(mp);
  789. if (error)
  790. goto out_log_dealloc;
  791. /*
  792. * Get and sanity-check the root inode.
  793. * Save the pointer to it in the mount structure.
  794. */
  795. error = xfs_iget(mp, NULL, sbp->sb_rootino, XFS_IGET_UNTRUSTED,
  796. XFS_ILOCK_EXCL, &rip);
  797. if (error) {
  798. xfs_warn(mp,
  799. "Failed to read root inode 0x%llx, error %d",
  800. sbp->sb_rootino, -error);
  801. goto out_log_dealloc;
  802. }
  803. ASSERT(rip != NULL);
  804. if (unlikely(!S_ISDIR(VFS_I(rip)->i_mode))) {
  805. xfs_warn(mp, "corrupted root inode %llu: not a directory",
  806. (unsigned long long)rip->i_ino);
  807. xfs_iunlock(rip, XFS_ILOCK_EXCL);
  808. XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
  809. mp);
  810. error = -EFSCORRUPTED;
  811. goto out_rele_rip;
  812. }
  813. mp->m_rootip = rip; /* save it */
  814. xfs_iunlock(rip, XFS_ILOCK_EXCL);
  815. /*
  816. * Initialize realtime inode pointers in the mount structure
  817. */
  818. error = xfs_rtmount_inodes(mp);
  819. if (error) {
  820. /*
  821. * Free up the root inode.
  822. */
  823. xfs_warn(mp, "failed to read RT inodes");
  824. goto out_rele_rip;
  825. }
  826. /*
  827. * If this is a read-only mount defer the superblock updates until
  828. * the next remount into writeable mode. Otherwise we would never
  829. * perform the update e.g. for the root filesystem.
  830. */
  831. if (mp->m_update_sb && !(mp->m_flags & XFS_MOUNT_RDONLY)) {
  832. error = xfs_sync_sb(mp, false);
  833. if (error) {
  834. xfs_warn(mp, "failed to write sb changes");
  835. goto out_rtunmount;
  836. }
  837. }
  838. /*
  839. * Initialise the XFS quota management subsystem for this mount
  840. */
  841. if (XFS_IS_QUOTA_RUNNING(mp)) {
  842. error = xfs_qm_newmount(mp, &quotamount, &quotaflags);
  843. if (error)
  844. goto out_rtunmount;
  845. } else {
  846. ASSERT(!XFS_IS_QUOTA_ON(mp));
  847. /*
  848. * If a file system had quotas running earlier, but decided to
  849. * mount without -o uquota/pquota/gquota options, revoke the
  850. * quotachecked license.
  851. */
  852. if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
  853. xfs_notice(mp, "resetting quota flags");
  854. error = xfs_mount_reset_sbqflags(mp);
  855. if (error)
  856. goto out_rtunmount;
  857. }
  858. }
  859. /*
  860. * Finish recovering the file system. This part needed to be delayed
  861. * until after the root and real-time bitmap inodes were consistently
  862. * read in.
  863. */
  864. error = xfs_log_mount_finish(mp);
  865. if (error) {
  866. xfs_warn(mp, "log mount finish failed");
  867. goto out_rtunmount;
  868. }
  869. /*
  870. * Now the log is fully replayed, we can transition to full read-only
  871. * mode for read-only mounts. This will sync all the metadata and clean
  872. * the log so that the recovery we just performed does not have to be
  873. * replayed again on the next mount.
  874. *
  875. * We use the same quiesce mechanism as the rw->ro remount, as they are
  876. * semantically identical operations.
  877. */
  878. if ((mp->m_flags & (XFS_MOUNT_RDONLY|XFS_MOUNT_NORECOVERY)) ==
  879. XFS_MOUNT_RDONLY) {
  880. xfs_quiesce_attr(mp);
  881. }
  882. /*
  883. * Complete the quota initialisation, post-log-replay component.
  884. */
  885. if (quotamount) {
  886. ASSERT(mp->m_qflags == 0);
  887. mp->m_qflags = quotaflags;
  888. xfs_qm_mount_quotas(mp);
  889. }
  890. /*
  891. * Now we are mounted, reserve a small amount of unused space for
  892. * privileged transactions. This is needed so that transaction
  893. * space required for critical operations can dip into this pool
  894. * when at ENOSPC. This is needed for operations like create with
  895. * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
  896. * are not allowed to use this reserved space.
  897. *
  898. * This may drive us straight to ENOSPC on mount, but that implies
  899. * we were already there on the last unmount. Warn if this occurs.
  900. */
  901. if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
  902. resblks = xfs_default_resblks(mp);
  903. error = xfs_reserve_blocks(mp, &resblks, NULL);
  904. if (error)
  905. xfs_warn(mp,
  906. "Unable to allocate reserve blocks. Continuing without reserve pool.");
  907. /* Recover any CoW blocks that never got remapped. */
  908. error = xfs_reflink_recover_cow(mp);
  909. if (error) {
  910. xfs_err(mp,
  911. "Error %d recovering leftover CoW allocations.", error);
  912. xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
  913. goto out_quota;
  914. }
  915. /* Reserve AG blocks for future btree expansion. */
  916. error = xfs_fs_reserve_ag_blocks(mp);
  917. if (error && error != -ENOSPC)
  918. goto out_agresv;
  919. }
  920. return 0;
  921. out_agresv:
  922. xfs_fs_unreserve_ag_blocks(mp);
  923. out_quota:
  924. xfs_qm_unmount_quotas(mp);
  925. out_rtunmount:
  926. xfs_rtunmount_inodes(mp);
  927. out_rele_rip:
  928. xfs_irele(rip);
  929. /* Clean out dquots that might be in memory after quotacheck. */
  930. xfs_qm_unmount(mp);
  931. /*
  932. * Cancel all delayed reclaim work and reclaim the inodes directly.
  933. * We have to do this /after/ rtunmount and qm_unmount because those
  934. * two will have scheduled delayed reclaim for the rt/quota inodes.
  935. *
  936. * This is slightly different from the unmountfs call sequence
  937. * because we could be tearing down a partially set up mount. In
  938. * particular, if log_mount_finish fails we bail out without calling
  939. * qm_unmount_quotas and therefore rely on qm_unmount to release the
  940. * quota inodes.
  941. */
  942. cancel_delayed_work_sync(&mp->m_reclaim_work);
  943. xfs_reclaim_inodes(mp, SYNC_WAIT);
  944. out_log_dealloc:
  945. mp->m_flags |= XFS_MOUNT_UNMOUNTING;
  946. xfs_log_mount_cancel(mp);
  947. out_fail_wait:
  948. if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
  949. xfs_wait_buftarg(mp->m_logdev_targp);
  950. xfs_wait_buftarg(mp->m_ddev_targp);
  951. out_free_perag:
  952. xfs_free_perag(mp);
  953. out_free_dir:
  954. xfs_da_unmount(mp);
  955. out_remove_uuid:
  956. xfs_uuid_unmount(mp);
  957. out_remove_errortag:
  958. xfs_errortag_del(mp);
  959. out_remove_error_sysfs:
  960. xfs_error_sysfs_del(mp);
  961. out_del_stats:
  962. xfs_sysfs_del(&mp->m_stats.xs_kobj);
  963. out_remove_sysfs:
  964. xfs_sysfs_del(&mp->m_kobj);
  965. out:
  966. return error;
  967. }
  968. /*
  969. * This flushes out the inodes,dquots and the superblock, unmounts the
  970. * log and makes sure that incore structures are freed.
  971. */
  972. void
  973. xfs_unmountfs(
  974. struct xfs_mount *mp)
  975. {
  976. uint64_t resblks;
  977. int error;
  978. xfs_icache_disable_reclaim(mp);
  979. xfs_fs_unreserve_ag_blocks(mp);
  980. xfs_qm_unmount_quotas(mp);
  981. xfs_rtunmount_inodes(mp);
  982. xfs_irele(mp->m_rootip);
  983. /*
  984. * We can potentially deadlock here if we have an inode cluster
  985. * that has been freed has its buffer still pinned in memory because
  986. * the transaction is still sitting in a iclog. The stale inodes
  987. * on that buffer will have their flush locks held until the
  988. * transaction hits the disk and the callbacks run. the inode
  989. * flush takes the flush lock unconditionally and with nothing to
  990. * push out the iclog we will never get that unlocked. hence we
  991. * need to force the log first.
  992. */
  993. xfs_log_force(mp, XFS_LOG_SYNC);
  994. /*
  995. * Wait for all busy extents to be freed, including completion of
  996. * any discard operation.
  997. */
  998. xfs_extent_busy_wait_all(mp);
  999. flush_workqueue(xfs_discard_wq);
  1000. /*
  1001. * We now need to tell the world we are unmounting. This will allow
  1002. * us to detect that the filesystem is going away and we should error
  1003. * out anything that we have been retrying in the background. This will
  1004. * prevent neverending retries in AIL pushing from hanging the unmount.
  1005. */
  1006. mp->m_flags |= XFS_MOUNT_UNMOUNTING;
  1007. /*
  1008. * Flush all pending changes from the AIL.
  1009. */
  1010. xfs_ail_push_all_sync(mp->m_ail);
  1011. /*
  1012. * And reclaim all inodes. At this point there should be no dirty
  1013. * inodes and none should be pinned or locked, but use synchronous
  1014. * reclaim just to be sure. We can stop background inode reclaim
  1015. * here as well if it is still running.
  1016. */
  1017. cancel_delayed_work_sync(&mp->m_reclaim_work);
  1018. xfs_reclaim_inodes(mp, SYNC_WAIT);
  1019. xfs_qm_unmount(mp);
  1020. /*
  1021. * Unreserve any blocks we have so that when we unmount we don't account
  1022. * the reserved free space as used. This is really only necessary for
  1023. * lazy superblock counting because it trusts the incore superblock
  1024. * counters to be absolutely correct on clean unmount.
  1025. *
  1026. * We don't bother correcting this elsewhere for lazy superblock
  1027. * counting because on mount of an unclean filesystem we reconstruct the
  1028. * correct counter value and this is irrelevant.
  1029. *
  1030. * For non-lazy counter filesystems, this doesn't matter at all because
  1031. * we only every apply deltas to the superblock and hence the incore
  1032. * value does not matter....
  1033. */
  1034. resblks = 0;
  1035. error = xfs_reserve_blocks(mp, &resblks, NULL);
  1036. if (error)
  1037. xfs_warn(mp, "Unable to free reserved block pool. "
  1038. "Freespace may not be correct on next mount.");
  1039. error = xfs_log_sbcount(mp);
  1040. if (error)
  1041. xfs_warn(mp, "Unable to update superblock counters. "
  1042. "Freespace may not be correct on next mount.");
  1043. xfs_log_unmount(mp);
  1044. xfs_da_unmount(mp);
  1045. xfs_uuid_unmount(mp);
  1046. #if defined(DEBUG)
  1047. xfs_errortag_clearall(mp);
  1048. #endif
  1049. xfs_free_perag(mp);
  1050. xfs_errortag_del(mp);
  1051. xfs_error_sysfs_del(mp);
  1052. xfs_sysfs_del(&mp->m_stats.xs_kobj);
  1053. xfs_sysfs_del(&mp->m_kobj);
  1054. }
  1055. /*
  1056. * Determine whether modifications can proceed. The caller specifies the minimum
  1057. * freeze level for which modifications should not be allowed. This allows
  1058. * certain operations to proceed while the freeze sequence is in progress, if
  1059. * necessary.
  1060. */
  1061. bool
  1062. xfs_fs_writable(
  1063. struct xfs_mount *mp,
  1064. int level)
  1065. {
  1066. ASSERT(level > SB_UNFROZEN);
  1067. if ((mp->m_super->s_writers.frozen >= level) ||
  1068. XFS_FORCED_SHUTDOWN(mp) || (mp->m_flags & XFS_MOUNT_RDONLY))
  1069. return false;
  1070. return true;
  1071. }
  1072. /*
  1073. * xfs_log_sbcount
  1074. *
  1075. * Sync the superblock counters to disk.
  1076. *
  1077. * Note this code can be called during the process of freezing, so we use the
  1078. * transaction allocator that does not block when the transaction subsystem is
  1079. * in its frozen state.
  1080. */
  1081. int
  1082. xfs_log_sbcount(xfs_mount_t *mp)
  1083. {
  1084. /* allow this to proceed during the freeze sequence... */
  1085. if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
  1086. return 0;
  1087. /*
  1088. * we don't need to do this if we are updating the superblock
  1089. * counters on every modification.
  1090. */
  1091. if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
  1092. return 0;
  1093. return xfs_sync_sb(mp, true);
  1094. }
  1095. /*
  1096. * Deltas for the inode count are +/-64, hence we use a large batch size
  1097. * of 128 so we don't need to take the counter lock on every update.
  1098. */
  1099. #define XFS_ICOUNT_BATCH 128
  1100. int
  1101. xfs_mod_icount(
  1102. struct xfs_mount *mp,
  1103. int64_t delta)
  1104. {
  1105. percpu_counter_add_batch(&mp->m_icount, delta, XFS_ICOUNT_BATCH);
  1106. if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) {
  1107. ASSERT(0);
  1108. percpu_counter_add(&mp->m_icount, -delta);
  1109. return -EINVAL;
  1110. }
  1111. return 0;
  1112. }
  1113. int
  1114. xfs_mod_ifree(
  1115. struct xfs_mount *mp,
  1116. int64_t delta)
  1117. {
  1118. percpu_counter_add(&mp->m_ifree, delta);
  1119. if (percpu_counter_compare(&mp->m_ifree, 0) < 0) {
  1120. ASSERT(0);
  1121. percpu_counter_add(&mp->m_ifree, -delta);
  1122. return -EINVAL;
  1123. }
  1124. return 0;
  1125. }
  1126. /*
  1127. * Deltas for the block count can vary from 1 to very large, but lock contention
  1128. * only occurs on frequent small block count updates such as in the delayed
  1129. * allocation path for buffered writes (page a time updates). Hence we set
  1130. * a large batch count (1024) to minimise global counter updates except when
  1131. * we get near to ENOSPC and we have to be very accurate with our updates.
  1132. */
  1133. #define XFS_FDBLOCKS_BATCH 1024
  1134. int
  1135. xfs_mod_fdblocks(
  1136. struct xfs_mount *mp,
  1137. int64_t delta,
  1138. bool rsvd)
  1139. {
  1140. int64_t lcounter;
  1141. long long res_used;
  1142. s32 batch;
  1143. if (delta > 0) {
  1144. /*
  1145. * If the reserve pool is depleted, put blocks back into it
  1146. * first. Most of the time the pool is full.
  1147. */
  1148. if (likely(mp->m_resblks == mp->m_resblks_avail)) {
  1149. percpu_counter_add(&mp->m_fdblocks, delta);
  1150. return 0;
  1151. }
  1152. spin_lock(&mp->m_sb_lock);
  1153. res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
  1154. if (res_used > delta) {
  1155. mp->m_resblks_avail += delta;
  1156. } else {
  1157. delta -= res_used;
  1158. mp->m_resblks_avail = mp->m_resblks;
  1159. percpu_counter_add(&mp->m_fdblocks, delta);
  1160. }
  1161. spin_unlock(&mp->m_sb_lock);
  1162. return 0;
  1163. }
  1164. /*
  1165. * Taking blocks away, need to be more accurate the closer we
  1166. * are to zero.
  1167. *
  1168. * If the counter has a value of less than 2 * max batch size,
  1169. * then make everything serialise as we are real close to
  1170. * ENOSPC.
  1171. */
  1172. if (__percpu_counter_compare(&mp->m_fdblocks, 2 * XFS_FDBLOCKS_BATCH,
  1173. XFS_FDBLOCKS_BATCH) < 0)
  1174. batch = 1;
  1175. else
  1176. batch = XFS_FDBLOCKS_BATCH;
  1177. percpu_counter_add_batch(&mp->m_fdblocks, delta, batch);
  1178. if (__percpu_counter_compare(&mp->m_fdblocks, mp->m_alloc_set_aside,
  1179. XFS_FDBLOCKS_BATCH) >= 0) {
  1180. /* we had space! */
  1181. return 0;
  1182. }
  1183. /*
  1184. * lock up the sb for dipping into reserves before releasing the space
  1185. * that took us to ENOSPC.
  1186. */
  1187. spin_lock(&mp->m_sb_lock);
  1188. percpu_counter_add(&mp->m_fdblocks, -delta);
  1189. if (!rsvd)
  1190. goto fdblocks_enospc;
  1191. lcounter = (long long)mp->m_resblks_avail + delta;
  1192. if (lcounter >= 0) {
  1193. mp->m_resblks_avail = lcounter;
  1194. spin_unlock(&mp->m_sb_lock);
  1195. return 0;
  1196. }
  1197. printk_once(KERN_WARNING
  1198. "Filesystem \"%s\": reserve blocks depleted! "
  1199. "Consider increasing reserve pool size.",
  1200. mp->m_fsname);
  1201. fdblocks_enospc:
  1202. spin_unlock(&mp->m_sb_lock);
  1203. return -ENOSPC;
  1204. }
  1205. int
  1206. xfs_mod_frextents(
  1207. struct xfs_mount *mp,
  1208. int64_t delta)
  1209. {
  1210. int64_t lcounter;
  1211. int ret = 0;
  1212. spin_lock(&mp->m_sb_lock);
  1213. lcounter = mp->m_sb.sb_frextents + delta;
  1214. if (lcounter < 0)
  1215. ret = -ENOSPC;
  1216. else
  1217. mp->m_sb.sb_frextents = lcounter;
  1218. spin_unlock(&mp->m_sb_lock);
  1219. return ret;
  1220. }
  1221. /*
  1222. * xfs_getsb() is called to obtain the buffer for the superblock.
  1223. * The buffer is returned locked and read in from disk.
  1224. * The buffer should be released with a call to xfs_brelse().
  1225. *
  1226. * If the flags parameter is BUF_TRYLOCK, then we'll only return
  1227. * the superblock buffer if it can be locked without sleeping.
  1228. * If it can't then we'll return NULL.
  1229. */
  1230. struct xfs_buf *
  1231. xfs_getsb(
  1232. struct xfs_mount *mp,
  1233. int flags)
  1234. {
  1235. struct xfs_buf *bp = mp->m_sb_bp;
  1236. if (!xfs_buf_trylock(bp)) {
  1237. if (flags & XBF_TRYLOCK)
  1238. return NULL;
  1239. xfs_buf_lock(bp);
  1240. }
  1241. xfs_buf_hold(bp);
  1242. ASSERT(bp->b_flags & XBF_DONE);
  1243. return bp;
  1244. }
  1245. /*
  1246. * Used to free the superblock along various error paths.
  1247. */
  1248. void
  1249. xfs_freesb(
  1250. struct xfs_mount *mp)
  1251. {
  1252. struct xfs_buf *bp = mp->m_sb_bp;
  1253. xfs_buf_lock(bp);
  1254. mp->m_sb_bp = NULL;
  1255. xfs_buf_relse(bp);
  1256. }
  1257. /*
  1258. * If the underlying (data/log/rt) device is readonly, there are some
  1259. * operations that cannot proceed.
  1260. */
  1261. int
  1262. xfs_dev_is_read_only(
  1263. struct xfs_mount *mp,
  1264. char *message)
  1265. {
  1266. if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
  1267. xfs_readonly_buftarg(mp->m_logdev_targp) ||
  1268. (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
  1269. xfs_notice(mp, "%s required on read-only device.", message);
  1270. xfs_notice(mp, "write access unavailable, cannot proceed.");
  1271. return -EROFS;
  1272. }
  1273. return 0;
  1274. }
  1275. /* Force the summary counters to be recalculated at next mount. */
  1276. void
  1277. xfs_force_summary_recalc(
  1278. struct xfs_mount *mp)
  1279. {
  1280. if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
  1281. return;
  1282. spin_lock(&mp->m_sb_lock);
  1283. mp->m_flags |= XFS_MOUNT_BAD_SUMMARY;
  1284. spin_unlock(&mp->m_sb_lock);
  1285. }