xfs_extent_busy.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594
  1. /*
  2. * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
  3. * Copyright (c) 2010 David Chinner.
  4. * Copyright (c) 2011 Christoph Hellwig.
  5. * All Rights Reserved.
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License as
  9. * published by the Free Software Foundation.
  10. *
  11. * This program is distributed in the hope that it would be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, write the Free Software Foundation,
  18. * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "xfs.h"
  21. #include "xfs_fs.h"
  22. #include "xfs_format.h"
  23. #include "xfs_log_format.h"
  24. #include "xfs_shared.h"
  25. #include "xfs_trans_resv.h"
  26. #include "xfs_sb.h"
  27. #include "xfs_mount.h"
  28. #include "xfs_alloc.h"
  29. #include "xfs_extent_busy.h"
  30. #include "xfs_trace.h"
  31. #include "xfs_trans.h"
  32. #include "xfs_log.h"
  33. void
  34. xfs_extent_busy_insert(
  35. struct xfs_trans *tp,
  36. xfs_agnumber_t agno,
  37. xfs_agblock_t bno,
  38. xfs_extlen_t len,
  39. unsigned int flags)
  40. {
  41. struct xfs_extent_busy *new;
  42. struct xfs_extent_busy *busyp;
  43. struct xfs_perag *pag;
  44. struct rb_node **rbp;
  45. struct rb_node *parent = NULL;
  46. new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_SLEEP);
  47. new->agno = agno;
  48. new->bno = bno;
  49. new->length = len;
  50. INIT_LIST_HEAD(&new->list);
  51. new->flags = flags;
  52. /* trace before insert to be able to see failed inserts */
  53. trace_xfs_extent_busy(tp->t_mountp, agno, bno, len);
  54. pag = xfs_perag_get(tp->t_mountp, new->agno);
  55. spin_lock(&pag->pagb_lock);
  56. rbp = &pag->pagb_tree.rb_node;
  57. while (*rbp) {
  58. parent = *rbp;
  59. busyp = rb_entry(parent, struct xfs_extent_busy, rb_node);
  60. if (new->bno < busyp->bno) {
  61. rbp = &(*rbp)->rb_left;
  62. ASSERT(new->bno + new->length <= busyp->bno);
  63. } else if (new->bno > busyp->bno) {
  64. rbp = &(*rbp)->rb_right;
  65. ASSERT(bno >= busyp->bno + busyp->length);
  66. } else {
  67. ASSERT(0);
  68. }
  69. }
  70. rb_link_node(&new->rb_node, parent, rbp);
  71. rb_insert_color(&new->rb_node, &pag->pagb_tree);
  72. list_add(&new->list, &tp->t_busy);
  73. spin_unlock(&pag->pagb_lock);
  74. xfs_perag_put(pag);
  75. }
  76. /*
  77. * Search for a busy extent within the range of the extent we are about to
  78. * allocate. You need to be holding the busy extent tree lock when calling
  79. * xfs_extent_busy_search(). This function returns 0 for no overlapping busy
  80. * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact
  81. * match. This is done so that a non-zero return indicates an overlap that
  82. * will require a synchronous transaction, but it can still be
  83. * used to distinguish between a partial or exact match.
  84. */
  85. int
  86. xfs_extent_busy_search(
  87. struct xfs_mount *mp,
  88. xfs_agnumber_t agno,
  89. xfs_agblock_t bno,
  90. xfs_extlen_t len)
  91. {
  92. struct xfs_perag *pag;
  93. struct rb_node *rbp;
  94. struct xfs_extent_busy *busyp;
  95. int match = 0;
  96. pag = xfs_perag_get(mp, agno);
  97. spin_lock(&pag->pagb_lock);
  98. rbp = pag->pagb_tree.rb_node;
  99. /* find closest start bno overlap */
  100. while (rbp) {
  101. busyp = rb_entry(rbp, struct xfs_extent_busy, rb_node);
  102. if (bno < busyp->bno) {
  103. /* may overlap, but exact start block is lower */
  104. if (bno + len > busyp->bno)
  105. match = -1;
  106. rbp = rbp->rb_left;
  107. } else if (bno > busyp->bno) {
  108. /* may overlap, but exact start block is higher */
  109. if (bno < busyp->bno + busyp->length)
  110. match = -1;
  111. rbp = rbp->rb_right;
  112. } else {
  113. /* bno matches busyp, length determines exact match */
  114. match = (busyp->length == len) ? 1 : -1;
  115. break;
  116. }
  117. }
  118. spin_unlock(&pag->pagb_lock);
  119. xfs_perag_put(pag);
  120. return match;
  121. }
  122. /*
  123. * The found free extent [fbno, fend] overlaps part or all of the given busy
  124. * extent. If the overlap covers the beginning, the end, or all of the busy
  125. * extent, the overlapping portion can be made unbusy and used for the
  126. * allocation. We can't split a busy extent because we can't modify a
  127. * transaction/CIL context busy list, but we can update an entry's block
  128. * number or length.
  129. *
  130. * Returns true if the extent can safely be reused, or false if the search
  131. * needs to be restarted.
  132. */
  133. STATIC bool
  134. xfs_extent_busy_update_extent(
  135. struct xfs_mount *mp,
  136. struct xfs_perag *pag,
  137. struct xfs_extent_busy *busyp,
  138. xfs_agblock_t fbno,
  139. xfs_extlen_t flen,
  140. bool userdata) __releases(&pag->pagb_lock)
  141. __acquires(&pag->pagb_lock)
  142. {
  143. xfs_agblock_t fend = fbno + flen;
  144. xfs_agblock_t bbno = busyp->bno;
  145. xfs_agblock_t bend = bbno + busyp->length;
  146. /*
  147. * This extent is currently being discarded. Give the thread
  148. * performing the discard a chance to mark the extent unbusy
  149. * and retry.
  150. */
  151. if (busyp->flags & XFS_EXTENT_BUSY_DISCARDED) {
  152. spin_unlock(&pag->pagb_lock);
  153. delay(1);
  154. spin_lock(&pag->pagb_lock);
  155. return false;
  156. }
  157. /*
  158. * If there is a busy extent overlapping a user allocation, we have
  159. * no choice but to force the log and retry the search.
  160. *
  161. * Fortunately this does not happen during normal operation, but
  162. * only if the filesystem is very low on space and has to dip into
  163. * the AGFL for normal allocations.
  164. */
  165. if (userdata)
  166. goto out_force_log;
  167. if (bbno < fbno && bend > fend) {
  168. /*
  169. * Case 1:
  170. * bbno bend
  171. * +BBBBBBBBBBBBBBBBB+
  172. * +---------+
  173. * fbno fend
  174. */
  175. /*
  176. * We would have to split the busy extent to be able to track
  177. * it correct, which we cannot do because we would have to
  178. * modify the list of busy extents attached to the transaction
  179. * or CIL context, which is immutable.
  180. *
  181. * Force out the log to clear the busy extent and retry the
  182. * search.
  183. */
  184. goto out_force_log;
  185. } else if (bbno >= fbno && bend <= fend) {
  186. /*
  187. * Case 2:
  188. * bbno bend
  189. * +BBBBBBBBBBBBBBBBB+
  190. * +-----------------+
  191. * fbno fend
  192. *
  193. * Case 3:
  194. * bbno bend
  195. * +BBBBBBBBBBBBBBBBB+
  196. * +--------------------------+
  197. * fbno fend
  198. *
  199. * Case 4:
  200. * bbno bend
  201. * +BBBBBBBBBBBBBBBBB+
  202. * +--------------------------+
  203. * fbno fend
  204. *
  205. * Case 5:
  206. * bbno bend
  207. * +BBBBBBBBBBBBBBBBB+
  208. * +-----------------------------------+
  209. * fbno fend
  210. *
  211. */
  212. /*
  213. * The busy extent is fully covered by the extent we are
  214. * allocating, and can simply be removed from the rbtree.
  215. * However we cannot remove it from the immutable list
  216. * tracking busy extents in the transaction or CIL context,
  217. * so set the length to zero to mark it invalid.
  218. *
  219. * We also need to restart the busy extent search from the
  220. * tree root, because erasing the node can rearrange the
  221. * tree topology.
  222. */
  223. rb_erase(&busyp->rb_node, &pag->pagb_tree);
  224. busyp->length = 0;
  225. return false;
  226. } else if (fend < bend) {
  227. /*
  228. * Case 6:
  229. * bbno bend
  230. * +BBBBBBBBBBBBBBBBB+
  231. * +---------+
  232. * fbno fend
  233. *
  234. * Case 7:
  235. * bbno bend
  236. * +BBBBBBBBBBBBBBBBB+
  237. * +------------------+
  238. * fbno fend
  239. *
  240. */
  241. busyp->bno = fend;
  242. } else if (bbno < fbno) {
  243. /*
  244. * Case 8:
  245. * bbno bend
  246. * +BBBBBBBBBBBBBBBBB+
  247. * +-------------+
  248. * fbno fend
  249. *
  250. * Case 9:
  251. * bbno bend
  252. * +BBBBBBBBBBBBBBBBB+
  253. * +----------------------+
  254. * fbno fend
  255. */
  256. busyp->length = fbno - busyp->bno;
  257. } else {
  258. ASSERT(0);
  259. }
  260. trace_xfs_extent_busy_reuse(mp, pag->pag_agno, fbno, flen);
  261. return true;
  262. out_force_log:
  263. spin_unlock(&pag->pagb_lock);
  264. xfs_log_force(mp, XFS_LOG_SYNC);
  265. trace_xfs_extent_busy_force(mp, pag->pag_agno, fbno, flen);
  266. spin_lock(&pag->pagb_lock);
  267. return false;
  268. }
  269. /*
  270. * For a given extent [fbno, flen], make sure we can reuse it safely.
  271. */
  272. void
  273. xfs_extent_busy_reuse(
  274. struct xfs_mount *mp,
  275. xfs_agnumber_t agno,
  276. xfs_agblock_t fbno,
  277. xfs_extlen_t flen,
  278. bool userdata)
  279. {
  280. struct xfs_perag *pag;
  281. struct rb_node *rbp;
  282. ASSERT(flen > 0);
  283. pag = xfs_perag_get(mp, agno);
  284. spin_lock(&pag->pagb_lock);
  285. restart:
  286. rbp = pag->pagb_tree.rb_node;
  287. while (rbp) {
  288. struct xfs_extent_busy *busyp =
  289. rb_entry(rbp, struct xfs_extent_busy, rb_node);
  290. xfs_agblock_t bbno = busyp->bno;
  291. xfs_agblock_t bend = bbno + busyp->length;
  292. if (fbno + flen <= bbno) {
  293. rbp = rbp->rb_left;
  294. continue;
  295. } else if (fbno >= bend) {
  296. rbp = rbp->rb_right;
  297. continue;
  298. }
  299. if (!xfs_extent_busy_update_extent(mp, pag, busyp, fbno, flen,
  300. userdata))
  301. goto restart;
  302. }
  303. spin_unlock(&pag->pagb_lock);
  304. xfs_perag_put(pag);
  305. }
  306. /*
  307. * For a given extent [fbno, flen], search the busy extent list to find a
  308. * subset of the extent that is not busy. If *rlen is smaller than
  309. * args->minlen no suitable extent could be found, and the higher level
  310. * code needs to force out the log and retry the allocation.
  311. */
  312. void
  313. xfs_extent_busy_trim(
  314. struct xfs_alloc_arg *args,
  315. xfs_agblock_t bno,
  316. xfs_extlen_t len,
  317. xfs_agblock_t *rbno,
  318. xfs_extlen_t *rlen)
  319. {
  320. xfs_agblock_t fbno;
  321. xfs_extlen_t flen;
  322. struct rb_node *rbp;
  323. ASSERT(len > 0);
  324. spin_lock(&args->pag->pagb_lock);
  325. restart:
  326. fbno = bno;
  327. flen = len;
  328. rbp = args->pag->pagb_tree.rb_node;
  329. while (rbp && flen >= args->minlen) {
  330. struct xfs_extent_busy *busyp =
  331. rb_entry(rbp, struct xfs_extent_busy, rb_node);
  332. xfs_agblock_t fend = fbno + flen;
  333. xfs_agblock_t bbno = busyp->bno;
  334. xfs_agblock_t bend = bbno + busyp->length;
  335. if (fend <= bbno) {
  336. rbp = rbp->rb_left;
  337. continue;
  338. } else if (fbno >= bend) {
  339. rbp = rbp->rb_right;
  340. continue;
  341. }
  342. /*
  343. * If this is a metadata allocation, try to reuse the busy
  344. * extent instead of trimming the allocation.
  345. */
  346. if (!xfs_alloc_is_userdata(args->datatype) &&
  347. !(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) {
  348. if (!xfs_extent_busy_update_extent(args->mp, args->pag,
  349. busyp, fbno, flen,
  350. false))
  351. goto restart;
  352. continue;
  353. }
  354. if (bbno <= fbno) {
  355. /* start overlap */
  356. /*
  357. * Case 1:
  358. * bbno bend
  359. * +BBBBBBBBBBBBBBBBB+
  360. * +---------+
  361. * fbno fend
  362. *
  363. * Case 2:
  364. * bbno bend
  365. * +BBBBBBBBBBBBBBBBB+
  366. * +-------------+
  367. * fbno fend
  368. *
  369. * Case 3:
  370. * bbno bend
  371. * +BBBBBBBBBBBBBBBBB+
  372. * +-------------+
  373. * fbno fend
  374. *
  375. * Case 4:
  376. * bbno bend
  377. * +BBBBBBBBBBBBBBBBB+
  378. * +-----------------+
  379. * fbno fend
  380. *
  381. * No unbusy region in extent, return failure.
  382. */
  383. if (fend <= bend)
  384. goto fail;
  385. /*
  386. * Case 5:
  387. * bbno bend
  388. * +BBBBBBBBBBBBBBBBB+
  389. * +----------------------+
  390. * fbno fend
  391. *
  392. * Case 6:
  393. * bbno bend
  394. * +BBBBBBBBBBBBBBBBB+
  395. * +--------------------------+
  396. * fbno fend
  397. *
  398. * Needs to be trimmed to:
  399. * +-------+
  400. * fbno fend
  401. */
  402. fbno = bend;
  403. } else if (bend >= fend) {
  404. /* end overlap */
  405. /*
  406. * Case 7:
  407. * bbno bend
  408. * +BBBBBBBBBBBBBBBBB+
  409. * +------------------+
  410. * fbno fend
  411. *
  412. * Case 8:
  413. * bbno bend
  414. * +BBBBBBBBBBBBBBBBB+
  415. * +--------------------------+
  416. * fbno fend
  417. *
  418. * Needs to be trimmed to:
  419. * +-------+
  420. * fbno fend
  421. */
  422. fend = bbno;
  423. } else {
  424. /* middle overlap */
  425. /*
  426. * Case 9:
  427. * bbno bend
  428. * +BBBBBBBBBBBBBBBBB+
  429. * +-----------------------------------+
  430. * fbno fend
  431. *
  432. * Can be trimmed to:
  433. * +-------+ OR +-------+
  434. * fbno fend fbno fend
  435. *
  436. * Backward allocation leads to significant
  437. * fragmentation of directories, which degrades
  438. * directory performance, therefore we always want to
  439. * choose the option that produces forward allocation
  440. * patterns.
  441. * Preferring the lower bno extent will make the next
  442. * request use "fend" as the start of the next
  443. * allocation; if the segment is no longer busy at
  444. * that point, we'll get a contiguous allocation, but
  445. * even if it is still busy, we will get a forward
  446. * allocation.
  447. * We try to avoid choosing the segment at "bend",
  448. * because that can lead to the next allocation
  449. * taking the segment at "fbno", which would be a
  450. * backward allocation. We only use the segment at
  451. * "fbno" if it is much larger than the current
  452. * requested size, because in that case there's a
  453. * good chance subsequent allocations will be
  454. * contiguous.
  455. */
  456. if (bbno - fbno >= args->maxlen) {
  457. /* left candidate fits perfect */
  458. fend = bbno;
  459. } else if (fend - bend >= args->maxlen * 4) {
  460. /* right candidate has enough free space */
  461. fbno = bend;
  462. } else if (bbno - fbno >= args->minlen) {
  463. /* left candidate fits minimum requirement */
  464. fend = bbno;
  465. } else {
  466. goto fail;
  467. }
  468. }
  469. flen = fend - fbno;
  470. }
  471. spin_unlock(&args->pag->pagb_lock);
  472. if (fbno != bno || flen != len) {
  473. trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len,
  474. fbno, flen);
  475. }
  476. *rbno = fbno;
  477. *rlen = flen;
  478. return;
  479. fail:
  480. /*
  481. * Return a zero extent length as failure indications. All callers
  482. * re-check if the trimmed extent satisfies the minlen requirement.
  483. */
  484. spin_unlock(&args->pag->pagb_lock);
  485. trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
  486. *rbno = fbno;
  487. *rlen = 0;
  488. }
  489. STATIC void
  490. xfs_extent_busy_clear_one(
  491. struct xfs_mount *mp,
  492. struct xfs_perag *pag,
  493. struct xfs_extent_busy *busyp)
  494. {
  495. if (busyp->length) {
  496. trace_xfs_extent_busy_clear(mp, busyp->agno, busyp->bno,
  497. busyp->length);
  498. rb_erase(&busyp->rb_node, &pag->pagb_tree);
  499. }
  500. list_del_init(&busyp->list);
  501. kmem_free(busyp);
  502. }
  503. /*
  504. * Remove all extents on the passed in list from the busy extents tree.
  505. * If do_discard is set skip extents that need to be discarded, and mark
  506. * these as undergoing a discard operation instead.
  507. */
  508. void
  509. xfs_extent_busy_clear(
  510. struct xfs_mount *mp,
  511. struct list_head *list,
  512. bool do_discard)
  513. {
  514. struct xfs_extent_busy *busyp, *n;
  515. struct xfs_perag *pag = NULL;
  516. xfs_agnumber_t agno = NULLAGNUMBER;
  517. list_for_each_entry_safe(busyp, n, list, list) {
  518. if (busyp->agno != agno) {
  519. if (pag) {
  520. spin_unlock(&pag->pagb_lock);
  521. xfs_perag_put(pag);
  522. }
  523. pag = xfs_perag_get(mp, busyp->agno);
  524. spin_lock(&pag->pagb_lock);
  525. agno = busyp->agno;
  526. }
  527. if (do_discard && busyp->length &&
  528. !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD))
  529. busyp->flags = XFS_EXTENT_BUSY_DISCARDED;
  530. else
  531. xfs_extent_busy_clear_one(mp, pag, busyp);
  532. }
  533. if (pag) {
  534. spin_unlock(&pag->pagb_lock);
  535. xfs_perag_put(pag);
  536. }
  537. }
  538. /*
  539. * Callback for list_sort to sort busy extents by the AG they reside in.
  540. */
  541. int
  542. xfs_extent_busy_ag_cmp(
  543. void *priv,
  544. struct list_head *a,
  545. struct list_head *b)
  546. {
  547. return container_of(a, struct xfs_extent_busy, list)->agno -
  548. container_of(b, struct xfs_extent_busy, list)->agno;
  549. }