objset.c 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. /*-
  2. * SPDX-License-Identifier: BSD-2-Clause
  3. *
  4. * Copyright (c) 2022 The FreeBSD Foundation
  5. *
  6. * This software was developed by Mark Johnston under sponsorship from
  7. * the FreeBSD Foundation.
  8. *
  9. * Redistribution and use in source and binary forms, with or without
  10. * modification, are permitted provided that the following conditions are
  11. * met:
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. * 2. Redistributions in binary form must reproduce the above copyright
  15. * notice, this list of conditions and the following disclaimer in
  16. * the documentation and/or other materials provided with the distribution.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  19. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  22. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  23. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  24. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  25. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  26. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  27. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  28. * SUCH DAMAGE.
  29. */
  30. #include <assert.h>
  31. #include <stdlib.h>
  32. #include <string.h>
  33. #include <util.h>
  34. #include "zfs.h"
  35. #define DNODES_PER_CHUNK (MAXBLOCKSIZE / sizeof(dnode_phys_t))
  36. struct objset_dnode_chunk {
  37. dnode_phys_t buf[DNODES_PER_CHUNK];
  38. unsigned int nextfree;
  39. STAILQ_ENTRY(objset_dnode_chunk) next;
  40. };
  41. typedef struct zfs_objset {
  42. /* Physical object set. */
  43. objset_phys_t *phys;
  44. off_t osloc;
  45. off_t osblksz;
  46. blkptr_t osbp; /* set in objset_write() */
  47. /* Accounting. */
  48. off_t space; /* bytes allocated to this objset */
  49. /* dnode allocator. */
  50. uint64_t dnodecount;
  51. STAILQ_HEAD(, objset_dnode_chunk) dnodechunks;
  52. } zfs_objset_t;
  53. static void
  54. dnode_init(dnode_phys_t *dnode, uint8_t type, uint8_t bonustype,
  55. uint16_t bonuslen)
  56. {
  57. dnode->dn_indblkshift = MAXBLOCKSHIFT;
  58. dnode->dn_type = type;
  59. dnode->dn_bonustype = bonustype;
  60. dnode->dn_bonuslen = bonuslen;
  61. dnode->dn_checksum = ZIO_CHECKSUM_FLETCHER_4;
  62. dnode->dn_nlevels = 1;
  63. dnode->dn_nblkptr = 1;
  64. dnode->dn_flags = DNODE_FLAG_USED_BYTES;
  65. }
  66. zfs_objset_t *
  67. objset_alloc(zfs_opt_t *zfs, uint64_t type)
  68. {
  69. struct objset_dnode_chunk *chunk;
  70. zfs_objset_t *os;
  71. os = ecalloc(1, sizeof(*os));
  72. os->osblksz = sizeof(objset_phys_t);
  73. os->osloc = objset_space_alloc(zfs, os, &os->osblksz);
  74. /*
  75. * Object ID zero is always reserved for the meta dnode, which is
  76. * embedded in the objset itself.
  77. */
  78. STAILQ_INIT(&os->dnodechunks);
  79. chunk = ecalloc(1, sizeof(*chunk));
  80. chunk->nextfree = 1;
  81. STAILQ_INSERT_HEAD(&os->dnodechunks, chunk, next);
  82. os->dnodecount = 1;
  83. os->phys = ecalloc(1, os->osblksz);
  84. os->phys->os_type = type;
  85. dnode_init(&os->phys->os_meta_dnode, DMU_OT_DNODE, DMU_OT_NONE, 0);
  86. os->phys->os_meta_dnode.dn_datablkszsec =
  87. DNODE_BLOCK_SIZE >> MINBLOCKSHIFT;
  88. return (os);
  89. }
  90. /*
  91. * Write the dnode array and physical object set to disk.
  92. */
  93. static void
  94. _objset_write(zfs_opt_t *zfs, zfs_objset_t *os, struct dnode_cursor *c,
  95. off_t loc)
  96. {
  97. struct objset_dnode_chunk *chunk, *tmp;
  98. unsigned int total;
  99. /*
  100. * Write out the dnode array, i.e., the meta-dnode. For some reason its
  101. * data blocks must be 16KB in size no matter how large the array is.
  102. */
  103. total = 0;
  104. STAILQ_FOREACH_SAFE(chunk, &os->dnodechunks, next, tmp) {
  105. unsigned int i;
  106. assert(chunk->nextfree > 0);
  107. assert(chunk->nextfree <= os->dnodecount);
  108. assert(chunk->nextfree <= DNODES_PER_CHUNK);
  109. for (i = 0; i < chunk->nextfree; i += DNODES_PER_BLOCK) {
  110. blkptr_t *bp;
  111. uint64_t fill;
  112. if (chunk->nextfree - i < DNODES_PER_BLOCK)
  113. fill = DNODES_PER_BLOCK - (chunk->nextfree - i);
  114. else
  115. fill = 0;
  116. bp = dnode_cursor_next(zfs, c,
  117. (total + i) * sizeof(dnode_phys_t));
  118. vdev_pwrite_dnode_indir(zfs, &os->phys->os_meta_dnode,
  119. 0, fill, chunk->buf + i, DNODE_BLOCK_SIZE, loc, bp);
  120. loc += DNODE_BLOCK_SIZE;
  121. }
  122. total += i;
  123. free(chunk);
  124. }
  125. dnode_cursor_finish(zfs, c);
  126. STAILQ_INIT(&os->dnodechunks);
  127. /*
  128. * Write the object set itself. The saved block pointer will be copied
  129. * into the referencing DSL dataset or the uberblocks.
  130. */
  131. vdev_pwrite_data(zfs, DMU_OT_OBJSET, ZIO_CHECKSUM_FLETCHER_4, 0,
  132. os->dnodecount - 1, os->phys, os->osblksz, os->osloc, &os->osbp);
  133. }
  134. void
  135. objset_write(zfs_opt_t *zfs, zfs_objset_t *os)
  136. {
  137. struct dnode_cursor *c;
  138. off_t dnodeloc, dnodesz;
  139. uint64_t dnodecount;
  140. /*
  141. * There is a chicken-and-egg problem here when writing the MOS: we
  142. * cannot write space maps before we're finished allocating space from
  143. * the vdev, and we can't write the MOS without having allocated space
  144. * for indirect dnode blocks. Thus, rather than lazily allocating
  145. * indirect blocks for the meta-dnode (which would be simpler), they are
  146. * allocated up-front and before writing space maps.
  147. */
  148. dnodecount = os->dnodecount;
  149. if (os == zfs->mos)
  150. dnodecount += zfs->mscount;
  151. dnodesz = dnodecount * sizeof(dnode_phys_t);
  152. c = dnode_cursor_init(zfs, os, &os->phys->os_meta_dnode, dnodesz,
  153. DNODE_BLOCK_SIZE);
  154. dnodesz = roundup2(dnodesz, DNODE_BLOCK_SIZE);
  155. dnodeloc = objset_space_alloc(zfs, os, &dnodesz);
  156. if (os == zfs->mos) {
  157. vdev_spacemap_write(zfs);
  158. /*
  159. * We've finished allocating space, account for it in $MOS and
  160. * in the parent directory.
  161. */
  162. dsl_dir_root_finalize(zfs, os->space);
  163. }
  164. _objset_write(zfs, os, c, dnodeloc);
  165. }
  166. dnode_phys_t *
  167. objset_dnode_bonus_alloc(zfs_objset_t *os, uint8_t type, uint8_t bonustype,
  168. uint16_t bonuslen, uint64_t *idp)
  169. {
  170. struct objset_dnode_chunk *chunk;
  171. dnode_phys_t *dnode;
  172. assert(bonuslen <= DN_OLD_MAX_BONUSLEN);
  173. assert(!STAILQ_EMPTY(&os->dnodechunks));
  174. chunk = STAILQ_LAST(&os->dnodechunks, objset_dnode_chunk, next);
  175. if (chunk->nextfree == DNODES_PER_CHUNK) {
  176. chunk = ecalloc(1, sizeof(*chunk));
  177. STAILQ_INSERT_TAIL(&os->dnodechunks, chunk, next);
  178. }
  179. *idp = os->dnodecount++;
  180. dnode = &chunk->buf[chunk->nextfree++];
  181. dnode_init(dnode, type, bonustype, bonuslen);
  182. dnode->dn_datablkszsec = os->osblksz >> MINBLOCKSHIFT;
  183. return (dnode);
  184. }
  185. dnode_phys_t *
  186. objset_dnode_alloc(zfs_objset_t *os, uint8_t type, uint64_t *idp)
  187. {
  188. return (objset_dnode_bonus_alloc(os, type, DMU_OT_NONE, 0, idp));
  189. }
  190. /*
  191. * Look up a physical dnode by ID. This is not used often so a linear search is
  192. * fine.
  193. */
  194. dnode_phys_t *
  195. objset_dnode_lookup(zfs_objset_t *os, uint64_t id)
  196. {
  197. struct objset_dnode_chunk *chunk;
  198. assert(id > 0);
  199. assert(id < os->dnodecount);
  200. STAILQ_FOREACH(chunk, &os->dnodechunks, next) {
  201. if (id < DNODES_PER_CHUNK)
  202. return (&chunk->buf[id]);
  203. id -= DNODES_PER_CHUNK;
  204. }
  205. assert(0);
  206. return (NULL);
  207. }
  208. off_t
  209. objset_space_alloc(zfs_opt_t *zfs, zfs_objset_t *os, off_t *lenp)
  210. {
  211. off_t loc;
  212. loc = vdev_space_alloc(zfs, lenp);
  213. os->space += *lenp;
  214. return (loc);
  215. }
  216. uint64_t
  217. objset_space(const zfs_objset_t *os)
  218. {
  219. return (os->space);
  220. }
  221. void
  222. objset_root_blkptr_copy(const zfs_objset_t *os, blkptr_t *bp)
  223. {
  224. memcpy(bp, &os->osbp, sizeof(blkptr_t));
  225. }