123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083 |
- /*-
- * SPDX-License-Identifier: BSD-2-Clause
- *
- * Copyright (c) 2022 The FreeBSD Foundation
- *
- * This software was developed by Mark Johnston under sponsorship from
- * the FreeBSD Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
- #include <sys/stat.h>
- #include <assert.h>
- #include <dirent.h>
- #include <fcntl.h>
- #include <stdlib.h>
- #include <string.h>
- #include <unistd.h>
- #include <util.h>
- #include "makefs.h"
- #include "zfs.h"
- typedef struct {
- const char *name;
- unsigned int id;
- uint16_t size;
- sa_bswap_type_t bs;
- } zfs_sattr_t;
- typedef struct zfs_fs {
- zfs_objset_t *os;
- /* Offset table for system attributes, indexed by a zpl_attr_t. */
- uint16_t *saoffs;
- size_t sacnt;
- const zfs_sattr_t *satab;
- } zfs_fs_t;
- /*
- * The order of the attributes doesn't matter, this is simply the one hard-coded
- * by OpenZFS, based on a zdb dump of the SA_REGISTRY table.
- */
- typedef enum zpl_attr {
- ZPL_ATIME,
- ZPL_MTIME,
- ZPL_CTIME,
- ZPL_CRTIME,
- ZPL_GEN,
- ZPL_MODE,
- ZPL_SIZE,
- ZPL_PARENT,
- ZPL_LINKS,
- ZPL_XATTR,
- ZPL_RDEV,
- ZPL_FLAGS,
- ZPL_UID,
- ZPL_GID,
- ZPL_PAD,
- ZPL_ZNODE_ACL,
- ZPL_DACL_COUNT,
- ZPL_SYMLINK,
- ZPL_SCANSTAMP,
- ZPL_DACL_ACES,
- ZPL_DXATTR,
- ZPL_PROJID,
- } zpl_attr_t;
- /*
- * This table must be kept in sync with zpl_attr_layout[] and zpl_attr_t.
- */
- static const zfs_sattr_t zpl_attrs[] = {
- #define _ZPL_ATTR(n, s, b) { .name = #n, .id = n, .size = s, .bs = b }
- _ZPL_ATTR(ZPL_ATIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY),
- _ZPL_ATTR(ZPL_MTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY),
- _ZPL_ATTR(ZPL_CTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY),
- _ZPL_ATTR(ZPL_CRTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY),
- _ZPL_ATTR(ZPL_GEN, sizeof(uint64_t), SA_UINT64_ARRAY),
- _ZPL_ATTR(ZPL_MODE, sizeof(uint64_t), SA_UINT64_ARRAY),
- _ZPL_ATTR(ZPL_SIZE, sizeof(uint64_t), SA_UINT64_ARRAY),
- _ZPL_ATTR(ZPL_PARENT, sizeof(uint64_t), SA_UINT64_ARRAY),
- _ZPL_ATTR(ZPL_LINKS, sizeof(uint64_t), SA_UINT64_ARRAY),
- _ZPL_ATTR(ZPL_XATTR, sizeof(uint64_t), SA_UINT64_ARRAY),
- _ZPL_ATTR(ZPL_RDEV, sizeof(uint64_t), SA_UINT64_ARRAY),
- _ZPL_ATTR(ZPL_FLAGS, sizeof(uint64_t), SA_UINT64_ARRAY),
- _ZPL_ATTR(ZPL_UID, sizeof(uint64_t), SA_UINT64_ARRAY),
- _ZPL_ATTR(ZPL_GID, sizeof(uint64_t), SA_UINT64_ARRAY),
- _ZPL_ATTR(ZPL_PAD, sizeof(uint64_t), SA_UINT64_ARRAY),
- _ZPL_ATTR(ZPL_ZNODE_ACL, 88, SA_UINT64_ARRAY),
- _ZPL_ATTR(ZPL_DACL_COUNT, sizeof(uint64_t), SA_UINT64_ARRAY),
- _ZPL_ATTR(ZPL_SYMLINK, 0, SA_UINT8_ARRAY),
- _ZPL_ATTR(ZPL_SCANSTAMP, sizeof(uint64_t) * 4, SA_UINT8_ARRAY),
- _ZPL_ATTR(ZPL_DACL_ACES, 0, SA_ACL),
- _ZPL_ATTR(ZPL_DXATTR, 0, SA_UINT8_ARRAY),
- _ZPL_ATTR(ZPL_PROJID, sizeof(uint64_t), SA_UINT64_ARRAY),
- #undef ZPL_ATTR
- };
- /*
- * This layout matches that of a filesystem created using OpenZFS on FreeBSD.
- * It need not match in general, but FreeBSD's loader doesn't bother parsing the
- * layout and just hard-codes attribute offsets.
- */
- static const sa_attr_type_t zpl_attr_layout[] = {
- ZPL_MODE,
- ZPL_SIZE,
- ZPL_GEN,
- ZPL_UID,
- ZPL_GID,
- ZPL_PARENT,
- ZPL_FLAGS,
- ZPL_ATIME,
- ZPL_MTIME,
- ZPL_CTIME,
- ZPL_CRTIME,
- ZPL_LINKS,
- ZPL_DACL_COUNT,
- ZPL_DACL_ACES,
- ZPL_SYMLINK,
- };
- /*
- * Keys for the ZPL attribute tables in the SA layout ZAP. The first two
- * indices are reserved for legacy attribute encoding.
- */
- #define SA_LAYOUT_INDEX_DEFAULT 2
- #define SA_LAYOUT_INDEX_SYMLINK 3
- struct fs_populate_dir {
- SLIST_ENTRY(fs_populate_dir) next;
- int dirfd;
- uint64_t objid;
- zfs_zap_t *zap;
- };
- struct fs_populate_arg {
- zfs_opt_t *zfs;
- zfs_fs_t *fs; /* owning filesystem */
- uint64_t rootdirid; /* root directory dnode ID */
- int rootdirfd; /* root directory fd */
- SLIST_HEAD(, fs_populate_dir) dirs; /* stack of directories */
- };
- static void fs_build_one(zfs_opt_t *, zfs_dsl_dir_t *, fsnode *, int);
- static void
- eclose(int fd)
- {
- if (close(fd) != 0)
- err(1, "close");
- }
- static bool
- fsnode_isroot(const fsnode *cur)
- {
- return (strcmp(cur->name, ".") == 0);
- }
- /*
- * Visit each node in a directory hierarchy, in pre-order depth-first order.
- */
- static void
- fsnode_foreach(fsnode *root, int (*cb)(fsnode *, void *), void *arg)
- {
- assert(root->type == S_IFDIR);
- for (fsnode *cur = root; cur != NULL; cur = cur->next) {
- assert(cur->type == S_IFREG || cur->type == S_IFDIR ||
- cur->type == S_IFLNK);
- if (cb(cur, arg) == 0)
- continue;
- if (cur->type == S_IFDIR && cur->child != NULL)
- fsnode_foreach(cur->child, cb, arg);
- }
- }
- static void
- fs_populate_dirent(struct fs_populate_arg *arg, fsnode *cur, uint64_t dnid)
- {
- struct fs_populate_dir *dir;
- uint64_t type;
- switch (cur->type) {
- case S_IFREG:
- type = DT_REG;
- break;
- case S_IFDIR:
- type = DT_DIR;
- break;
- case S_IFLNK:
- type = DT_LNK;
- break;
- default:
- assert(0);
- }
- dir = SLIST_FIRST(&arg->dirs);
- zap_add_uint64(dir->zap, cur->name, ZFS_DIRENT_MAKE(type, dnid));
- }
- static void
- fs_populate_attr(zfs_fs_t *fs, char *attrbuf, const void *val, uint16_t ind,
- size_t *szp)
- {
- assert(ind < fs->sacnt);
- assert(fs->saoffs[ind] != 0xffff);
- memcpy(attrbuf + fs->saoffs[ind], val, fs->satab[ind].size);
- *szp += fs->satab[ind].size;
- }
- static void
- fs_populate_varszattr(zfs_fs_t *fs, char *attrbuf, const void *val,
- size_t valsz, size_t varoff, uint16_t ind, size_t *szp)
- {
- assert(ind < fs->sacnt);
- assert(fs->saoffs[ind] != 0xffff);
- assert(fs->satab[ind].size == 0);
- memcpy(attrbuf + fs->saoffs[ind] + varoff, val, valsz);
- *szp += valsz;
- }
- /*
- * Derive the relative fd/path combo needed to access a file. Ideally we'd
- * always be able to use relative lookups (i.e., use the *at() system calls),
- * since they require less path translation and are more amenable to sandboxing,
- * but the handling of multiple staging directories makes that difficult. To
- * make matters worse, we have no choice but to use relative lookups when
- * dealing with an mtree manifest, so both mechanisms are implemented.
- */
- static void
- fs_populate_path(const fsnode *cur, struct fs_populate_arg *arg,
- char *path, size_t sz, int *dirfdp)
- {
- if (cur->contents != NULL) {
- size_t n;
- *dirfdp = AT_FDCWD;
- n = strlcpy(path, cur->contents, sz);
- assert(n < sz);
- } else if (cur->root == NULL) {
- size_t n;
- *dirfdp = SLIST_FIRST(&arg->dirs)->dirfd;
- n = strlcpy(path, cur->name, sz);
- assert(n < sz);
- } else {
- int n;
- *dirfdp = AT_FDCWD;
- n = snprintf(path, sz, "%s/%s/%s",
- cur->root, cur->path, cur->name);
- assert(n >= 0);
- assert((size_t)n < sz);
- }
- }
- static int
- fs_open(const fsnode *cur, struct fs_populate_arg *arg, int flags)
- {
- char path[PATH_MAX];
- int fd;
- fs_populate_path(cur, arg, path, sizeof(path), &fd);
- fd = openat(fd, path, flags);
- if (fd < 0)
- err(1, "openat(%s)", path);
- return (fd);
- }
- static int
- fs_open_can_fail(const fsnode *cur, struct fs_populate_arg *arg, int flags)
- {
- int fd;
- char path[PATH_MAX];
- fs_populate_path(cur, arg, path, sizeof(path), &fd);
- return (openat(fd, path, flags));
- }
- static void
- fs_readlink(const fsnode *cur, struct fs_populate_arg *arg,
- char *buf, size_t bufsz)
- {
- char path[PATH_MAX];
- int fd;
- if (cur->symlink != NULL) {
- size_t n;
- n = strlcpy(buf, cur->symlink, bufsz);
- assert(n < bufsz);
- } else {
- ssize_t n;
- fs_populate_path(cur, arg, path, sizeof(path), &fd);
- n = readlinkat(fd, path, buf, bufsz - 1);
- if (n == -1)
- err(1, "readlinkat(%s)", cur->name);
- buf[n] = '\0';
- }
- }
- static void
- fs_populate_time(zfs_fs_t *fs, char *attrbuf, struct timespec *ts,
- uint16_t ind, size_t *szp)
- {
- uint64_t timebuf[2];
- assert(ind < fs->sacnt);
- assert(fs->saoffs[ind] != 0xffff);
- assert(fs->satab[ind].size == sizeof(timebuf));
- timebuf[0] = ts->tv_sec;
- timebuf[1] = ts->tv_nsec;
- fs_populate_attr(fs, attrbuf, timebuf, ind, szp);
- }
- static void
- fs_populate_sattrs(struct fs_populate_arg *arg, const fsnode *cur,
- dnode_phys_t *dnode)
- {
- char target[PATH_MAX];
- zfs_fs_t *fs;
- zfs_ace_hdr_t aces[3];
- struct stat *sb;
- sa_hdr_phys_t *sahdr;
- uint64_t daclcount, flags, gen, gid, links, mode, parent, objsize, uid;
- char *attrbuf;
- size_t bonussz, hdrsz;
- int layout;
- assert(dnode->dn_bonustype == DMU_OT_SA);
- assert(dnode->dn_nblkptr == 1);
- fs = arg->fs;
- sb = &cur->inode->st;
- switch (cur->type) {
- case S_IFREG:
- layout = SA_LAYOUT_INDEX_DEFAULT;
- links = cur->inode->nlink;
- objsize = sb->st_size;
- parent = SLIST_FIRST(&arg->dirs)->objid;
- break;
- case S_IFDIR:
- layout = SA_LAYOUT_INDEX_DEFAULT;
- links = 1; /* .. */
- objsize = 1; /* .. */
- /*
- * The size of a ZPL directory is the number of entries
- * (including "." and ".."), and the link count is the number of
- * entries which are directories (including "." and "..").
- */
- for (fsnode *c = fsnode_isroot(cur) ? cur->next : cur->child;
- c != NULL; c = c->next) {
- if (c->type == S_IFDIR)
- links++;
- objsize++;
- }
- /* The root directory is its own parent. */
- parent = SLIST_EMPTY(&arg->dirs) ?
- arg->rootdirid : SLIST_FIRST(&arg->dirs)->objid;
- break;
- case S_IFLNK:
- fs_readlink(cur, arg, target, sizeof(target));
- layout = SA_LAYOUT_INDEX_SYMLINK;
- links = 1;
- objsize = strlen(target);
- parent = SLIST_FIRST(&arg->dirs)->objid;
- break;
- default:
- assert(0);
- }
- daclcount = nitems(aces);
- flags = ZFS_ACL_TRIVIAL | ZFS_ACL_AUTO_INHERIT | ZFS_ARCHIVE |
- ZFS_AV_MODIFIED;
- gen = 1;
- gid = sb->st_gid;
- mode = sb->st_mode;
- uid = sb->st_uid;
- memset(aces, 0, sizeof(aces));
- aces[0].z_flags = ACE_OWNER;
- aces[0].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE;
- aces[0].z_access_mask = ACE_WRITE_ATTRIBUTES | ACE_WRITE_OWNER |
- ACE_WRITE_ACL | ACE_WRITE_NAMED_ATTRS | ACE_READ_ACL |
- ACE_READ_ATTRIBUTES | ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE;
- if ((mode & S_IRUSR) != 0)
- aces[0].z_access_mask |= ACE_READ_DATA;
- if ((mode & S_IWUSR) != 0)
- aces[0].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA;
- if ((mode & S_IXUSR) != 0)
- aces[0].z_access_mask |= ACE_EXECUTE;
- aces[1].z_flags = ACE_GROUP | ACE_IDENTIFIER_GROUP;
- aces[1].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE;
- aces[1].z_access_mask = ACE_READ_ACL | ACE_READ_ATTRIBUTES |
- ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE;
- if ((mode & S_IRGRP) != 0)
- aces[1].z_access_mask |= ACE_READ_DATA;
- if ((mode & S_IWGRP) != 0)
- aces[1].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA;
- if ((mode & S_IXGRP) != 0)
- aces[1].z_access_mask |= ACE_EXECUTE;
- aces[2].z_flags = ACE_EVERYONE;
- aces[2].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE;
- aces[2].z_access_mask = ACE_READ_ACL | ACE_READ_ATTRIBUTES |
- ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE;
- if ((mode & S_IROTH) != 0)
- aces[2].z_access_mask |= ACE_READ_DATA;
- if ((mode & S_IWOTH) != 0)
- aces[2].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA;
- if ((mode & S_IXOTH) != 0)
- aces[2].z_access_mask |= ACE_EXECUTE;
- switch (layout) {
- case SA_LAYOUT_INDEX_DEFAULT:
- /* At most one variable-length attribute. */
- hdrsz = sizeof(uint64_t);
- break;
- case SA_LAYOUT_INDEX_SYMLINK:
- /* At most five variable-length attributes. */
- hdrsz = sizeof(uint64_t) * 2;
- break;
- default:
- assert(0);
- }
- sahdr = (sa_hdr_phys_t *)DN_BONUS(dnode);
- sahdr->sa_magic = SA_MAGIC;
- SA_HDR_LAYOUT_INFO_ENCODE(sahdr->sa_layout_info, layout, hdrsz);
- bonussz = SA_HDR_SIZE(sahdr);
- attrbuf = (char *)sahdr + SA_HDR_SIZE(sahdr);
- fs_populate_attr(fs, attrbuf, &daclcount, ZPL_DACL_COUNT, &bonussz);
- fs_populate_attr(fs, attrbuf, &flags, ZPL_FLAGS, &bonussz);
- fs_populate_attr(fs, attrbuf, &gen, ZPL_GEN, &bonussz);
- fs_populate_attr(fs, attrbuf, &gid, ZPL_GID, &bonussz);
- fs_populate_attr(fs, attrbuf, &links, ZPL_LINKS, &bonussz);
- fs_populate_attr(fs, attrbuf, &mode, ZPL_MODE, &bonussz);
- fs_populate_attr(fs, attrbuf, &parent, ZPL_PARENT, &bonussz);
- fs_populate_attr(fs, attrbuf, &objsize, ZPL_SIZE, &bonussz);
- fs_populate_attr(fs, attrbuf, &uid, ZPL_UID, &bonussz);
- /*
- * We deliberately set atime = mtime here to ensure that images are
- * reproducible.
- */
- fs_populate_time(fs, attrbuf, &sb->st_mtim, ZPL_ATIME, &bonussz);
- fs_populate_time(fs, attrbuf, &sb->st_ctim, ZPL_CTIME, &bonussz);
- fs_populate_time(fs, attrbuf, &sb->st_mtim, ZPL_MTIME, &bonussz);
- #ifdef __linux__
- /* Linux has no st_birthtim; approximate with st_ctim */
- fs_populate_time(fs, attrbuf, &sb->st_ctim, ZPL_CRTIME, &bonussz);
- #else
- fs_populate_time(fs, attrbuf, &sb->st_birthtim, ZPL_CRTIME, &bonussz);
- #endif
- fs_populate_varszattr(fs, attrbuf, aces, sizeof(aces), 0,
- ZPL_DACL_ACES, &bonussz);
- sahdr->sa_lengths[0] = sizeof(aces);
- if (cur->type == S_IFLNK) {
- assert(layout == SA_LAYOUT_INDEX_SYMLINK);
- /* Need to use a spill block pointer if the target is long. */
- assert(bonussz + objsize <= DN_OLD_MAX_BONUSLEN);
- fs_populate_varszattr(fs, attrbuf, target, objsize,
- sahdr->sa_lengths[0], ZPL_SYMLINK, &bonussz);
- sahdr->sa_lengths[1] = (uint16_t)objsize;
- }
- dnode->dn_bonuslen = bonussz;
- }
- static void
- fs_populate_file(fsnode *cur, struct fs_populate_arg *arg)
- {
- struct dnode_cursor *c;
- dnode_phys_t *dnode;
- zfs_opt_t *zfs;
- char *buf;
- uint64_t dnid;
- ssize_t n;
- size_t bufsz;
- off_t nbytes, reqbytes, size;
- int fd;
- assert(cur->type == S_IFREG);
- assert((cur->inode->flags & FI_ROOT) == 0);
- zfs = arg->zfs;
- assert(cur->inode->ino != 0);
- if ((cur->inode->flags & FI_ALLOCATED) != 0) {
- /*
- * This is a hard link of an existing file.
- *
- * XXX-MJ need to check whether it crosses datasets, add a test
- * case for that
- */
- fs_populate_dirent(arg, cur, cur->inode->ino);
- return;
- }
- dnode = objset_dnode_bonus_alloc(arg->fs->os,
- DMU_OT_PLAIN_FILE_CONTENTS, DMU_OT_SA, 0, &dnid);
- cur->inode->ino = dnid;
- cur->inode->flags |= FI_ALLOCATED;
- fd = fs_open(cur, arg, O_RDONLY);
- buf = zfs->filebuf;
- bufsz = sizeof(zfs->filebuf);
- size = cur->inode->st.st_size;
- c = dnode_cursor_init(zfs, arg->fs->os, dnode, size, 0);
- for (off_t foff = 0; foff < size; foff += nbytes) {
- off_t loc, sofar;
- /*
- * Fill up our buffer, handling partial reads.
- */
- sofar = 0;
- nbytes = MIN(size - foff, (off_t)bufsz);
- do {
- n = read(fd, buf + sofar, nbytes);
- if (n < 0)
- err(1, "reading from '%s'", cur->name);
- if (n == 0)
- errx(1, "unexpected EOF reading '%s'",
- cur->name);
- sofar += n;
- } while (sofar < nbytes);
- if (nbytes < (off_t)bufsz)
- memset(buf + nbytes, 0, bufsz - nbytes);
- reqbytes = foff == 0 ? nbytes : MAXBLOCKSIZE;
- loc = objset_space_alloc(zfs, arg->fs->os, &reqbytes);
- vdev_pwrite_dnode_indir(zfs, dnode, 0, 1, buf, reqbytes, loc,
- dnode_cursor_next(zfs, c, foff));
- }
- eclose(fd);
- dnode_cursor_finish(zfs, c);
- fs_populate_sattrs(arg, cur, dnode);
- fs_populate_dirent(arg, cur, dnid);
- }
- static void
- fs_populate_dir(fsnode *cur, struct fs_populate_arg *arg)
- {
- dnode_phys_t *dnode;
- zfs_objset_t *os;
- uint64_t dnid;
- int dirfd;
- assert(cur->type == S_IFDIR);
- assert((cur->inode->flags & FI_ALLOCATED) == 0);
- os = arg->fs->os;
- dnode = objset_dnode_bonus_alloc(os, DMU_OT_DIRECTORY_CONTENTS,
- DMU_OT_SA, 0, &dnid);
- /*
- * Add an entry to the parent directory and open this directory.
- */
- if (!SLIST_EMPTY(&arg->dirs)) {
- fs_populate_dirent(arg, cur, dnid);
- /*
- * We only need the directory fd if we're finding files in
- * it. If it's just there for other directories or
- * files using contents= we don't need to succeed here.
- */
- dirfd = fs_open_can_fail(cur, arg, O_DIRECTORY | O_RDONLY);
- } else {
- arg->rootdirid = dnid;
- dirfd = arg->rootdirfd;
- arg->rootdirfd = -1;
- }
- /*
- * Set ZPL attributes.
- */
- fs_populate_sattrs(arg, cur, dnode);
- /*
- * If this is a root directory, then its children belong to a different
- * dataset and this directory remains empty in the current objset.
- */
- if ((cur->inode->flags & FI_ROOT) == 0) {
- struct fs_populate_dir *dir;
- dir = ecalloc(1, sizeof(*dir));
- dir->dirfd = dirfd;
- dir->objid = dnid;
- dir->zap = zap_alloc(os, dnode);
- SLIST_INSERT_HEAD(&arg->dirs, dir, next);
- } else {
- zap_write(arg->zfs, zap_alloc(os, dnode));
- fs_build_one(arg->zfs, cur->inode->param, cur->child, dirfd);
- }
- }
- static void
- fs_populate_symlink(fsnode *cur, struct fs_populate_arg *arg)
- {
- dnode_phys_t *dnode;
- uint64_t dnid;
- assert(cur->type == S_IFLNK);
- assert((cur->inode->flags & (FI_ALLOCATED | FI_ROOT)) == 0);
- dnode = objset_dnode_bonus_alloc(arg->fs->os,
- DMU_OT_PLAIN_FILE_CONTENTS, DMU_OT_SA, 0, &dnid);
- fs_populate_dirent(arg, cur, dnid);
- fs_populate_sattrs(arg, cur, dnode);
- }
- static int
- fs_foreach_populate(fsnode *cur, void *_arg)
- {
- struct fs_populate_arg *arg;
- struct fs_populate_dir *dir;
- int ret;
- arg = _arg;
- switch (cur->type) {
- case S_IFREG:
- fs_populate_file(cur, arg);
- break;
- case S_IFDIR:
- if (fsnode_isroot(cur))
- break;
- fs_populate_dir(cur, arg);
- break;
- case S_IFLNK:
- fs_populate_symlink(cur, arg);
- break;
- default:
- assert(0);
- }
- ret = (cur->inode->flags & FI_ROOT) != 0 ? 0 : 1;
- if (cur->next == NULL &&
- (cur->child == NULL || (cur->inode->flags & FI_ROOT) != 0)) {
- /*
- * We reached a terminal node in a subtree. Walk back up and
- * write out directories. We're done once we hit the root of a
- * dataset or find a level where we're not on the edge of the
- * tree.
- */
- do {
- dir = SLIST_FIRST(&arg->dirs);
- SLIST_REMOVE_HEAD(&arg->dirs, next);
- zap_write(arg->zfs, dir->zap);
- if (dir->dirfd != -1)
- eclose(dir->dirfd);
- free(dir);
- cur = cur->parent;
- } while (cur != NULL && cur->next == NULL &&
- (cur->inode->flags & FI_ROOT) == 0);
- }
- return (ret);
- }
- static void
- fs_add_zpl_attr_layout(zfs_zap_t *zap, unsigned int index,
- const sa_attr_type_t layout[], size_t sacnt)
- {
- char ti[16];
- assert(sizeof(layout[0]) == 2);
- snprintf(ti, sizeof(ti), "%u", index);
- zap_add(zap, ti, sizeof(sa_attr_type_t), sacnt,
- (const uint8_t *)layout);
- }
- /*
- * Initialize system attribute tables.
- *
- * There are two elements to this. First, we write the zpl_attrs[] and
- * zpl_attr_layout[] tables to disk. Then we create a lookup table which
- * allows us to set file attributes quickly.
- */
- static uint64_t
- fs_set_zpl_attrs(zfs_opt_t *zfs, zfs_fs_t *fs)
- {
- zfs_zap_t *sazap, *salzap, *sarzap;
- zfs_objset_t *os;
- dnode_phys_t *saobj, *salobj, *sarobj;
- uint64_t saobjid, salobjid, sarobjid;
- uint16_t offset;
- os = fs->os;
- /*
- * The on-disk tables are stored in two ZAP objects, the registry object
- * and the layout object. Individual attributes are described by
- * entries in the registry object; for example, the value for the
- * "ZPL_SIZE" key gives the size and encoding of the ZPL_SIZE attribute.
- * The attributes of a file are ordered according to one of the layouts
- * defined in the layout object. The master node object is simply used
- * to locate the registry and layout objects.
- */
- saobj = objset_dnode_alloc(os, DMU_OT_SA_MASTER_NODE, &saobjid);
- salobj = objset_dnode_alloc(os, DMU_OT_SA_ATTR_LAYOUTS, &salobjid);
- sarobj = objset_dnode_alloc(os, DMU_OT_SA_ATTR_REGISTRATION, &sarobjid);
- sarzap = zap_alloc(os, sarobj);
- for (size_t i = 0; i < nitems(zpl_attrs); i++) {
- const zfs_sattr_t *sa;
- uint64_t attr;
- attr = 0;
- sa = &zpl_attrs[i];
- SA_ATTR_ENCODE(attr, (uint64_t)i, sa->size, sa->bs);
- zap_add_uint64(sarzap, sa->name, attr);
- }
- zap_write(zfs, sarzap);
- /*
- * Layouts are arrays of indices into the registry. We define two
- * layouts for use by the ZPL, one for non-symlinks and one for
- * symlinks. They are identical except that the symlink layout includes
- * ZPL_SYMLINK as its final attribute.
- */
- salzap = zap_alloc(os, salobj);
- assert(zpl_attr_layout[nitems(zpl_attr_layout) - 1] == ZPL_SYMLINK);
- fs_add_zpl_attr_layout(salzap, SA_LAYOUT_INDEX_DEFAULT,
- zpl_attr_layout, nitems(zpl_attr_layout) - 1);
- fs_add_zpl_attr_layout(salzap, SA_LAYOUT_INDEX_SYMLINK,
- zpl_attr_layout, nitems(zpl_attr_layout));
- zap_write(zfs, salzap);
- sazap = zap_alloc(os, saobj);
- zap_add_uint64(sazap, SA_LAYOUTS, salobjid);
- zap_add_uint64(sazap, SA_REGISTRY, sarobjid);
- zap_write(zfs, sazap);
- /* Sanity check. */
- for (size_t i = 0; i < nitems(zpl_attrs); i++)
- assert(i == zpl_attrs[i].id);
- /*
- * Build the offset table used when setting file attributes. File
- * attributes are stored in the object's bonus buffer; this table
- * provides the buffer offset of attributes referenced by the layout
- * table.
- */
- fs->sacnt = nitems(zpl_attrs);
- fs->saoffs = ecalloc(fs->sacnt, sizeof(*fs->saoffs));
- for (size_t i = 0; i < fs->sacnt; i++)
- fs->saoffs[i] = 0xffff;
- offset = 0;
- for (size_t i = 0; i < nitems(zpl_attr_layout); i++) {
- uint16_t size;
- assert(zpl_attr_layout[i] < fs->sacnt);
- fs->saoffs[zpl_attr_layout[i]] = offset;
- size = zpl_attrs[zpl_attr_layout[i]].size;
- offset += size;
- }
- fs->satab = zpl_attrs;
- return (saobjid);
- }
- static void
- fs_layout_one(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, void *arg)
- {
- char *mountpoint, *origmountpoint, *name, *next;
- fsnode *cur, *root;
- uint64_t canmount;
- if (!dsl_dir_has_dataset(dsldir))
- return;
- if (dsl_dir_get_canmount(dsldir, &canmount) == 0 && canmount == 0)
- return;
- mountpoint = dsl_dir_get_mountpoint(zfs, dsldir);
- if (mountpoint == NULL)
- return;
- /*
- * If we were asked to specify a bootfs, set it here.
- */
- if (zfs->bootfs != NULL && strcmp(zfs->bootfs,
- dsl_dir_fullname(dsldir)) == 0) {
- zap_add_uint64(zfs->poolprops, "bootfs",
- dsl_dir_dataset_id(dsldir));
- }
- origmountpoint = mountpoint;
- /*
- * Figure out which fsnode corresponds to our mountpoint.
- */
- root = arg;
- cur = root;
- if (strcmp(mountpoint, zfs->rootpath) != 0) {
- mountpoint += strlen(zfs->rootpath);
- /*
- * Look up the directory in the staged tree. For example, if
- * the dataset's mount point is /foo/bar/baz, we'll search the
- * root directory for "foo", search "foo" for "baz", and so on.
- * Each intermediate name must refer to a directory; the final
- * component need not exist.
- */
- cur = root;
- for (next = name = mountpoint; next != NULL;) {
- for (; *next == '/'; next++)
- ;
- name = strsep(&next, "/");
- for (; cur != NULL && strcmp(cur->name, name) != 0;
- cur = cur->next)
- ;
- if (cur == NULL) {
- if (next == NULL)
- break;
- errx(1, "missing mountpoint directory for `%s'",
- dsl_dir_fullname(dsldir));
- }
- if (cur->type != S_IFDIR) {
- errx(1,
- "mountpoint for `%s' is not a directory",
- dsl_dir_fullname(dsldir));
- }
- if (next != NULL)
- cur = cur->child;
- }
- }
- if (cur != NULL) {
- assert(cur->type == S_IFDIR);
- /*
- * Multiple datasets shouldn't share a mountpoint. It's
- * technically allowed, but it's not clear what makefs should do
- * in that case.
- */
- assert((cur->inode->flags & FI_ROOT) == 0);
- if (cur != root)
- cur->inode->flags |= FI_ROOT;
- assert(cur->inode->param == NULL);
- cur->inode->param = dsldir;
- }
- free(origmountpoint);
- }
- static int
- fs_foreach_mark(fsnode *cur, void *arg)
- {
- uint64_t *countp;
- countp = arg;
- if (cur->type == S_IFDIR && fsnode_isroot(cur))
- return (1);
- if (cur->inode->ino == 0) {
- cur->inode->ino = ++(*countp);
- cur->inode->nlink = 1;
- } else {
- cur->inode->nlink++;
- }
- return ((cur->inode->flags & FI_ROOT) != 0 ? 0 : 1);
- }
- /*
- * Create a filesystem dataset. More specifically:
- * - create an object set for the dataset,
- * - add required metadata (SA tables, property definitions, etc.) to that
- * object set,
- * - optionally populate the object set with file objects, using "root" as the
- * root directory.
- *
- * "dirfd" is a directory descriptor for the directory referenced by "root". It
- * is closed before returning.
- */
- static void
- fs_build_one(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, fsnode *root, int dirfd)
- {
- struct fs_populate_arg arg;
- zfs_fs_t fs;
- zfs_zap_t *masterzap;
- zfs_objset_t *os;
- dnode_phys_t *deleteq, *masterobj;
- uint64_t deleteqid, dnodecount, moid, rootdirid, saobjid;
- bool fakedroot;
- /*
- * This dataset's mountpoint doesn't exist in the staging tree, or the
- * dataset doesn't have a mountpoint at all. In either case we still
- * need a root directory. Fake up a root fsnode to handle this case.
- */
- fakedroot = root == NULL;
- if (fakedroot) {
- struct stat *stp;
- assert(dirfd == -1);
- root = ecalloc(1, sizeof(*root));
- root->inode = ecalloc(1, sizeof(*root->inode));
- root->name = estrdup(".");
- root->type = S_IFDIR;
- stp = &root->inode->st;
- stp->st_uid = 0;
- stp->st_gid = 0;
- stp->st_mode = S_IFDIR | 0755;
- }
- assert(root->type == S_IFDIR);
- assert(fsnode_isroot(root));
- /*
- * Initialize the object set for this dataset.
- */
- os = objset_alloc(zfs, DMU_OST_ZFS);
- masterobj = objset_dnode_alloc(os, DMU_OT_MASTER_NODE, &moid);
- assert(moid == MASTER_NODE_OBJ);
- memset(&fs, 0, sizeof(fs));
- fs.os = os;
- /*
- * Create the ZAP SA layout now since filesystem object dnodes will
- * refer to those attributes.
- */
- saobjid = fs_set_zpl_attrs(zfs, &fs);
- /*
- * Make a pass over the staged directory to detect hard links and assign
- * virtual dnode numbers.
- */
- dnodecount = 1; /* root directory */
- fsnode_foreach(root, fs_foreach_mark, &dnodecount);
- /*
- * Make a second pass to populate the dataset with files from the
- * staged directory. Most of our runtime is spent here.
- */
- arg.rootdirfd = dirfd;
- arg.zfs = zfs;
- arg.fs = &fs;
- SLIST_INIT(&arg.dirs);
- fs_populate_dir(root, &arg);
- assert(!SLIST_EMPTY(&arg.dirs));
- fsnode_foreach(root, fs_foreach_populate, &arg);
- assert(SLIST_EMPTY(&arg.dirs));
- rootdirid = arg.rootdirid;
- /*
- * Create an empty delete queue. We don't do anything with it, but
- * OpenZFS will refuse to mount filesystems that don't have one.
- */
- deleteq = objset_dnode_alloc(os, DMU_OT_UNLINKED_SET, &deleteqid);
- zap_write(zfs, zap_alloc(os, deleteq));
- /*
- * Populate and write the master node object. This is a ZAP object
- * containing various dataset properties and the object IDs of the root
- * directory and delete queue.
- */
- masterzap = zap_alloc(os, masterobj);
- zap_add_uint64(masterzap, ZFS_ROOT_OBJ, rootdirid);
- zap_add_uint64(masterzap, ZFS_UNLINKED_SET, deleteqid);
- zap_add_uint64(masterzap, ZFS_SA_ATTRS, saobjid);
- zap_add_uint64(masterzap, ZPL_VERSION_OBJ, 5 /* ZPL_VERSION_SA */);
- zap_add_uint64(masterzap, "normalization", 0 /* off */);
- zap_add_uint64(masterzap, "utf8only", 0 /* off */);
- zap_add_uint64(masterzap, "casesensitivity", 0 /* case sensitive */);
- zap_add_uint64(masterzap, "acltype", 2 /* NFSv4 */);
- zap_write(zfs, masterzap);
- /*
- * All finished with this object set, we may as well write it now.
- * The DSL layer will sum up the bytes consumed by each dataset using
- * information stored in the object set, so it can't be freed just yet.
- */
- dsl_dir_dataset_write(zfs, os, dsldir);
- if (fakedroot) {
- free(root->inode);
- free(root->name);
- free(root);
- }
- free(fs.saoffs);
- }
- /*
- * Create an object set for each DSL directory which has a dataset and doesn't
- * already have an object set.
- */
- static void
- fs_build_unmounted(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, void *arg __unused)
- {
- if (dsl_dir_has_dataset(dsldir) && !dsl_dir_dataset_has_objset(dsldir))
- fs_build_one(zfs, dsldir, NULL, -1);
- }
- /*
- * Create our datasets and populate them with files.
- */
- void
- fs_build(zfs_opt_t *zfs, int dirfd, fsnode *root)
- {
- /*
- * Run through our datasets and find the root fsnode for each one. Each
- * root fsnode is flagged so that we can figure out which dataset it
- * belongs to.
- */
- dsl_dir_foreach(zfs, zfs->rootdsldir, fs_layout_one, root);
- /*
- * Did we find our boot filesystem?
- */
- if (zfs->bootfs != NULL && !zap_entry_exists(zfs->poolprops, "bootfs"))
- errx(1, "no mounted dataset matches bootfs property `%s'",
- zfs->bootfs);
- /*
- * Traverse the file hierarchy starting from the root fsnode. One
- * dataset, not necessarily the root dataset, must "own" the root
- * directory by having its mountpoint be equal to the root path.
- *
- * As roots of other datasets are encountered during the traversal,
- * fs_build_one() recursively creates the corresponding object sets and
- * populates them. Once this function has returned, all datasets will
- * have been fully populated.
- */
- fs_build_one(zfs, root->inode->param, root, dirfd);
- /*
- * Now create object sets for datasets whose mountpoints weren't found
- * in the staging directory, either because there is no mountpoint, or
- * because the mountpoint doesn't correspond to an existing directory.
- */
- dsl_dir_foreach(zfs, zfs->rootdsldir, fs_build_unmounted, NULL);
- }
|