123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630 |
- /*
- * fs/logfs/logfs_abi.h
- *
- * As should be obvious for Linux kernel code, license is GPLv2
- *
- * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
- *
- * Public header for logfs.
- */
- #ifndef FS_LOGFS_LOGFS_ABI_H
- #define FS_LOGFS_LOGFS_ABI_H
- /* For out-of-kernel compiles */
- #ifndef BUILD_BUG_ON
- #define BUILD_BUG_ON(condition) /**/
- #endif
- #define SIZE_CHECK(type, size) \
- static inline void check_##type(void) \
- { \
- BUILD_BUG_ON(sizeof(struct type) != (size)); \
- }
- /*
- * Throughout the logfs code, we're constantly dealing with blocks at
- * various positions or offsets. To remove confusion, we stricly
- * distinguish between a "position" - the logical position within a
- * file and an "offset" - the physical location within the device.
- *
- * Any usage of the term offset for a logical location or position for
- * a physical one is a bug and should get fixed.
- */
- /*
- * Block are allocated in one of several segments depending on their
- * level. The following levels are used:
- * 0 - regular data block
- * 1 - i1 indirect blocks
- * 2 - i2 indirect blocks
- * 3 - i3 indirect blocks
- * 4 - i4 indirect blocks
- * 5 - i5 indirect blocks
- * 6 - ifile data blocks
- * 7 - ifile i1 indirect blocks
- * 8 - ifile i2 indirect blocks
- * 9 - ifile i3 indirect blocks
- * 10 - ifile i4 indirect blocks
- * 11 - ifile i5 indirect blocks
- * Potential levels to be used in the future:
- * 12 - gc recycled blocks, long-lived data
- * 13 - replacement blocks, short-lived data
- *
- * Levels 1-11 are necessary for robust gc operations and help separate
- * short-lived metadata from longer-lived file data. In the future,
- * file data should get separated into several segments based on simple
- * heuristics. Old data recycled during gc operation is expected to be
- * long-lived. New data is of uncertain life expectancy. New data
- * used to replace older blocks in existing files is expected to be
- * short-lived.
- */
- /* Magic numbers. 64bit for superblock, 32bit for statfs f_type */
- #define LOGFS_MAGIC 0x7a3a8e5cb9d5bf67ull
- #define LOGFS_MAGIC_U32 0xc97e8168u
- /*
- * Various blocksize related macros. Blocksize is currently fixed at 4KiB.
- * Sooner or later that should become configurable and the macros replaced
- * by something superblock-dependent. Pointers in indirect blocks are and
- * will remain 64bit.
- *
- * LOGFS_BLOCKSIZE - self-explaining
- * LOGFS_BLOCK_FACTOR - number of pointers per indirect block
- * LOGFS_BLOCK_BITS - log2 of LOGFS_BLOCK_FACTOR, used for shifts
- */
- #define LOGFS_BLOCKSIZE (4096ull)
- #define LOGFS_BLOCK_FACTOR (LOGFS_BLOCKSIZE / sizeof(u64))
- #define LOGFS_BLOCK_BITS (9)
- /*
- * Number of blocks at various levels of indirection. There are 16 direct
- * block pointers plus a single indirect pointer.
- */
- #define I0_BLOCKS (16)
- #define I1_BLOCKS LOGFS_BLOCK_FACTOR
- #define I2_BLOCKS (LOGFS_BLOCK_FACTOR * I1_BLOCKS)
- #define I3_BLOCKS (LOGFS_BLOCK_FACTOR * I2_BLOCKS)
- #define I4_BLOCKS (LOGFS_BLOCK_FACTOR * I3_BLOCKS)
- #define I5_BLOCKS (LOGFS_BLOCK_FACTOR * I4_BLOCKS)
- #define INDIRECT_INDEX I0_BLOCKS
- #define LOGFS_EMBEDDED_FIELDS (I0_BLOCKS + 1)
- /*
- * Sizes at which files require another level of indirection. Files smaller
- * than LOGFS_EMBEDDED_SIZE can be completely stored in the inode itself,
- * similar like ext2 fast symlinks.
- *
- * Data at a position smaller than LOGFS_I0_SIZE is accessed through the
- * direct pointers, else through the 1x indirect pointer and so forth.
- */
- #define LOGFS_EMBEDDED_SIZE (LOGFS_EMBEDDED_FIELDS * sizeof(u64))
- #define LOGFS_I0_SIZE (I0_BLOCKS * LOGFS_BLOCKSIZE)
- #define LOGFS_I1_SIZE (I1_BLOCKS * LOGFS_BLOCKSIZE)
- #define LOGFS_I2_SIZE (I2_BLOCKS * LOGFS_BLOCKSIZE)
- #define LOGFS_I3_SIZE (I3_BLOCKS * LOGFS_BLOCKSIZE)
- #define LOGFS_I4_SIZE (I4_BLOCKS * LOGFS_BLOCKSIZE)
- #define LOGFS_I5_SIZE (I5_BLOCKS * LOGFS_BLOCKSIZE)
- /*
- * Each indirect block pointer must have this flag set, if all block pointers
- * behind it are set, i.e. there is no hole hidden in the shadow of this
- * indirect block pointer.
- */
- #define LOGFS_FULLY_POPULATED (1ULL << 63)
- #define pure_ofs(ofs) (ofs & ~LOGFS_FULLY_POPULATED)
- /*
- * LogFS needs to separate data into levels. Each level is defined as the
- * maximal possible distance from the master inode (inode of the inode file).
- * Data blocks reside on level 0, 1x indirect block on level 1, etc.
- * Inodes reside on level 6, indirect blocks for the inode file on levels 7-11.
- * This effort is necessary to guarantee garbage collection to always make
- * progress.
- *
- * LOGFS_MAX_INDIRECT is the maximal indirection through indirect blocks,
- * LOGFS_MAX_LEVELS is one more for the actual data level of a file. It is
- * the maximal number of levels for one file.
- * LOGFS_NO_AREAS is twice that, as the inode file and regular files are
- * effectively stacked on top of each other.
- */
- #define LOGFS_MAX_INDIRECT (5)
- #define LOGFS_MAX_LEVELS (LOGFS_MAX_INDIRECT + 1)
- #define LOGFS_NO_AREAS (2 * LOGFS_MAX_LEVELS)
- /* Maximum size of filenames */
- #define LOGFS_MAX_NAMELEN (255)
- /* Number of segments in the primary journal. */
- #define LOGFS_JOURNAL_SEGS (16)
- /* Maximum number of free/erased/etc. segments in journal entries */
- #define MAX_CACHED_SEGS (64)
- /*
- * LOGFS_OBJECT_HEADERSIZE is the size of a single header in the object store,
- * LOGFS_MAX_OBJECTSIZE the size of the largest possible object, including
- * its header,
- * LOGFS_SEGMENT_RESERVE is the amount of space reserved for each segment for
- * its segment header and the padded space at the end when no further objects
- * fit.
- */
- #define LOGFS_OBJECT_HEADERSIZE (0x1c)
- #define LOGFS_SEGMENT_HEADERSIZE (0x18)
- #define LOGFS_MAX_OBJECTSIZE (LOGFS_OBJECT_HEADERSIZE + LOGFS_BLOCKSIZE)
- #define LOGFS_SEGMENT_RESERVE \
- (LOGFS_SEGMENT_HEADERSIZE + LOGFS_MAX_OBJECTSIZE - 1)
- /*
- * Segment types:
- * SEG_SUPER - Data or indirect block
- * SEG_JOURNAL - Inode
- * SEG_OSTORE - Dentry
- */
- enum {
- SEG_SUPER = 0x01,
- SEG_JOURNAL = 0x02,
- SEG_OSTORE = 0x03,
- };
- /**
- * struct logfs_segment_header - per-segment header in the ostore
- *
- * @crc: crc32 of header (there is no data)
- * @pad: unused, must be 0
- * @type: segment type, see above
- * @level: GC level for all objects in this segment
- * @segno: segment number
- * @ec: erase count for this segment
- * @gec: global erase count at time of writing
- */
- struct logfs_segment_header {
- __be32 crc;
- __be16 pad;
- __u8 type;
- __u8 level;
- __be32 segno;
- __be32 ec;
- __be64 gec;
- };
- SIZE_CHECK(logfs_segment_header, LOGFS_SEGMENT_HEADERSIZE);
- #define LOGFS_FEATURES_INCOMPAT (0ull)
- #define LOGFS_FEATURES_RO_COMPAT (0ull)
- #define LOGFS_FEATURES_COMPAT (0ull)
- /**
- * struct logfs_disk_super - on-medium superblock
- *
- * @ds_magic: magic number, must equal LOGFS_MAGIC
- * @ds_crc: crc32 of structure starting with the next field
- * @ds_ifile_levels: maximum number of levels for ifile
- * @ds_iblock_levels: maximum number of levels for regular files
- * @ds_data_levels: number of separate levels for data
- * @pad0: reserved, must be 0
- * @ds_feature_incompat: incompatible filesystem features
- * @ds_feature_ro_compat: read-only compatible filesystem features
- * @ds_feature_compat: compatible filesystem features
- * @ds_flags: flags
- * @ds_segment_shift: log2 of segment size
- * @ds_block_shift: log2 of block size
- * @ds_write_shift: log2 of write size
- * @pad1: reserved, must be 0
- * @ds_journal_seg: segments used by primary journal
- * @ds_root_reserve: bytes reserved for the superuser
- * @ds_speed_reserve: bytes reserved to speed up GC
- * @ds_bad_seg_reserve: number of segments reserved to handle bad blocks
- * @pad2: reserved, must be 0
- * @pad3: reserved, must be 0
- *
- * Contains only read-only fields. Read-write fields like the amount of used
- * space is tracked in the dynamic superblock, which is stored in the journal.
- */
- struct logfs_disk_super {
- struct logfs_segment_header ds_sh;
- __be64 ds_magic;
- __be32 ds_crc;
- __u8 ds_ifile_levels;
- __u8 ds_iblock_levels;
- __u8 ds_data_levels;
- __u8 ds_segment_shift;
- __u8 ds_block_shift;
- __u8 ds_write_shift;
- __u8 pad0[6];
- __be64 ds_filesystem_size;
- __be32 ds_segment_size;
- __be32 ds_bad_seg_reserve;
- __be64 ds_feature_incompat;
- __be64 ds_feature_ro_compat;
- __be64 ds_feature_compat;
- __be64 ds_feature_flags;
- __be64 ds_root_reserve;
- __be64 ds_speed_reserve;
- __be32 ds_journal_seg[LOGFS_JOURNAL_SEGS];
- __be64 ds_super_ofs[2];
- __be64 pad3[8];
- };
- SIZE_CHECK(logfs_disk_super, 256);
- /*
- * Object types:
- * OBJ_BLOCK - Data or indirect block
- * OBJ_INODE - Inode
- * OBJ_DENTRY - Dentry
- */
- enum {
- OBJ_BLOCK = 0x04,
- OBJ_INODE = 0x05,
- OBJ_DENTRY = 0x06,
- };
- /**
- * struct logfs_object_header - per-object header in the ostore
- *
- * @crc: crc32 of header, excluding data_crc
- * @len: length of data
- * @type: object type, see above
- * @compr: compression type
- * @ino: inode number
- * @bix: block index
- * @data_crc: crc32 of payload
- */
- struct logfs_object_header {
- __be32 crc;
- __be16 len;
- __u8 type;
- __u8 compr;
- __be64 ino;
- __be64 bix;
- __be32 data_crc;
- } __attribute__((packed));
- SIZE_CHECK(logfs_object_header, LOGFS_OBJECT_HEADERSIZE);
- /*
- * Reserved inode numbers:
- * LOGFS_INO_MASTER - master inode (for inode file)
- * LOGFS_INO_ROOT - root directory
- * LOGFS_INO_SEGFILE - per-segment used bytes and erase count
- */
- enum {
- LOGFS_INO_MAPPING = 0x00,
- LOGFS_INO_MASTER = 0x01,
- LOGFS_INO_ROOT = 0x02,
- LOGFS_INO_SEGFILE = 0x03,
- LOGFS_RESERVED_INOS = 0x10,
- };
- /*
- * Inode flags. High bits should never be written to the medium. They are
- * reserved for in-memory usage.
- * Low bits should either remain in sync with the corresponding FS_*_FL or
- * reuse slots that obviously don't make sense for logfs.
- *
- * LOGFS_IF_DIRTY Inode must be written back
- * LOGFS_IF_ZOMBIE Inode has been deleted
- * LOGFS_IF_STILLBORN -ENOSPC happened when creating inode
- */
- #define LOGFS_IF_COMPRESSED 0x00000004 /* == FS_COMPR_FL */
- #define LOGFS_IF_DIRTY 0x20000000
- #define LOGFS_IF_ZOMBIE 0x40000000
- #define LOGFS_IF_STILLBORN 0x80000000
- /* Flags available to chattr */
- #define LOGFS_FL_USER_VISIBLE (LOGFS_IF_COMPRESSED)
- #define LOGFS_FL_USER_MODIFIABLE (LOGFS_IF_COMPRESSED)
- /* Flags inherited from parent directory on file/directory creation */
- #define LOGFS_FL_INHERITED (LOGFS_IF_COMPRESSED)
- /**
- * struct logfs_disk_inode - on-medium inode
- *
- * @di_mode: file mode
- * @di_pad: reserved, must be 0
- * @di_flags: inode flags, see above
- * @di_uid: user id
- * @di_gid: group id
- * @di_ctime: change time
- * @di_mtime: modify time
- * @di_refcount: reference count (aka nlink or link count)
- * @di_generation: inode generation, for nfs
- * @di_used_bytes: number of bytes used
- * @di_size: file size
- * @di_data: data pointers
- */
- struct logfs_disk_inode {
- __be16 di_mode;
- __u8 di_height;
- __u8 di_pad;
- __be32 di_flags;
- __be32 di_uid;
- __be32 di_gid;
- __be64 di_ctime;
- __be64 di_mtime;
- __be64 di_atime;
- __be32 di_refcount;
- __be32 di_generation;
- __be64 di_used_bytes;
- __be64 di_size;
- __be64 di_data[LOGFS_EMBEDDED_FIELDS];
- };
- SIZE_CHECK(logfs_disk_inode, 200);
- #define INODE_POINTER_OFS \
- (offsetof(struct logfs_disk_inode, di_data) / sizeof(__be64))
- #define INODE_USED_OFS \
- (offsetof(struct logfs_disk_inode, di_used_bytes) / sizeof(__be64))
- #define INODE_SIZE_OFS \
- (offsetof(struct logfs_disk_inode, di_size) / sizeof(__be64))
- #define INODE_HEIGHT_OFS (0)
- /**
- * struct logfs_disk_dentry - on-medium dentry structure
- *
- * @ino: inode number
- * @namelen: length of file name
- * @type: file type, identical to bits 12..15 of mode
- * @name: file name
- */
- /* FIXME: add 6 bytes of padding to remove the __packed */
- struct logfs_disk_dentry {
- __be64 ino;
- __be16 namelen;
- __u8 type;
- __u8 name[LOGFS_MAX_NAMELEN];
- } __attribute__((packed));
- SIZE_CHECK(logfs_disk_dentry, 266);
- #define RESERVED 0xffffffff
- #define BADSEG 0xffffffff
- /**
- * struct logfs_segment_entry - segment file entry
- *
- * @ec_level: erase count and level
- * @valid: number of valid bytes
- *
- * Segment file contains one entry for every segment. ec_level contains the
- * erasecount in the upper 28 bits and the level in the lower 4 bits. An
- * ec_level of BADSEG (-1) identifies bad segments. valid contains the number
- * of valid bytes or RESERVED (-1 again) if the segment is used for either the
- * superblock or the journal, or when the segment is bad.
- */
- struct logfs_segment_entry {
- __be32 ec_level;
- __be32 valid;
- };
- SIZE_CHECK(logfs_segment_entry, 8);
- /**
- * struct logfs_journal_header - header for journal entries (JEs)
- *
- * @h_crc: crc32 of journal entry
- * @h_len: length of compressed journal entry,
- * not including header
- * @h_datalen: length of uncompressed data
- * @h_type: JE type
- * @h_compr: compression type
- * @h_pad: reserved
- */
- struct logfs_journal_header {
- __be32 h_crc;
- __be16 h_len;
- __be16 h_datalen;
- __be16 h_type;
- __u8 h_compr;
- __u8 h_pad[5];
- };
- SIZE_CHECK(logfs_journal_header, 16);
- /*
- * Life expectency of data.
- * VIM_DEFAULT - default vim
- * VIM_SEGFILE - for segment file only - very short-living
- * VIM_GC - GC'd data - likely long-living
- */
- enum logfs_vim {
- VIM_DEFAULT = 0,
- VIM_SEGFILE = 1,
- };
- /**
- * struct logfs_je_area - wbuf header
- *
- * @segno: segment number of area
- * @used_bytes: number of bytes already used
- * @gc_level: GC level
- * @vim: life expectancy of data
- *
- * "Areas" are segments currently being used for writing. There is at least
- * one area per GC level. Several may be used to separate long-living from
- * short-living data. If an area with unknown vim is encountered, it can
- * simply be closed.
- * The write buffer immediately follow this header.
- */
- struct logfs_je_area {
- __be32 segno;
- __be32 used_bytes;
- __u8 gc_level;
- __u8 vim;
- } __attribute__((packed));
- SIZE_CHECK(logfs_je_area, 10);
- #define MAX_JOURNAL_HEADER \
- (sizeof(struct logfs_journal_header) + sizeof(struct logfs_je_area))
- /**
- * struct logfs_je_dynsb - dynamic superblock
- *
- * @ds_gec: global erase count
- * @ds_sweeper: current position of GC "sweeper"
- * @ds_rename_dir: source directory ino (see dir.c documentation)
- * @ds_rename_pos: position of source dd (see dir.c documentation)
- * @ds_victim_ino: victims of incomplete dir operation (see dir.c)
- * @ds_victim_ino: parent inode of victim (see dir.c)
- * @ds_used_bytes: number of used bytes
- */
- struct logfs_je_dynsb {
- __be64 ds_gec;
- __be64 ds_sweeper;
- __be64 ds_rename_dir;
- __be64 ds_rename_pos;
- __be64 ds_victim_ino;
- __be64 ds_victim_parent; /* XXX */
- __be64 ds_used_bytes;
- __be32 ds_generation;
- __be32 pad;
- };
- SIZE_CHECK(logfs_je_dynsb, 64);
- /**
- * struct logfs_je_anchor - anchor of filesystem tree, aka master inode
- *
- * @da_size: size of inode file
- * @da_last_ino: last created inode
- * @da_used_bytes: number of bytes used
- * @da_data: data pointers
- */
- struct logfs_je_anchor {
- __be64 da_size;
- __be64 da_last_ino;
- __be64 da_used_bytes;
- u8 da_height;
- u8 pad[7];
- __be64 da_data[LOGFS_EMBEDDED_FIELDS];
- };
- SIZE_CHECK(logfs_je_anchor, 168);
- /**
- * struct logfs_je_spillout - spillout entry (from 1st to 2nd journal)
- *
- * @so_segment: segments used for 2nd journal
- *
- * Length of the array is given by h_len field in the header.
- */
- struct logfs_je_spillout {
- __be64 so_segment[0];
- };
- SIZE_CHECK(logfs_je_spillout, 0);
- /**
- * struct logfs_je_journal_ec - erase counts for all journal segments
- *
- * @ec: erase count
- *
- * Length of the array is given by h_len field in the header.
- */
- struct logfs_je_journal_ec {
- __be32 ec[0];
- };
- SIZE_CHECK(logfs_je_journal_ec, 0);
- /**
- * struct logfs_je_free_segments - list of free segmetns with erase count
- */
- struct logfs_je_free_segments {
- __be32 segno;
- __be32 ec;
- };
- SIZE_CHECK(logfs_je_free_segments, 8);
- /**
- * struct logfs_seg_alias - list of segment aliases
- */
- struct logfs_seg_alias {
- __be32 old_segno;
- __be32 new_segno;
- };
- SIZE_CHECK(logfs_seg_alias, 8);
- /**
- * struct logfs_obj_alias - list of object aliases
- */
- struct logfs_obj_alias {
- __be64 ino;
- __be64 bix;
- __be64 val;
- u8 level;
- u8 pad[5];
- __be16 child_no;
- };
- SIZE_CHECK(logfs_obj_alias, 32);
- /**
- * Compression types.
- *
- * COMPR_NONE - uncompressed
- * COMPR_ZLIB - compressed with zlib
- */
- enum {
- COMPR_NONE = 0,
- COMPR_ZLIB = 1,
- };
- /*
- * Journal entries come in groups of 16. First group contains unique
- * entries, next groups contain one entry per level
- *
- * JE_FIRST - smallest possible journal entry number
- *
- * JEG_BASE - base group, containing unique entries
- * JE_COMMIT - commit entry, validates all previous entries
- * JE_DYNSB - dynamic superblock, anything that ought to be in the
- * superblock but cannot because it is read-write data
- * JE_ANCHOR - anchor aka master inode aka inode file's inode
- * JE_ERASECOUNT erasecounts for all journal segments
- * JE_SPILLOUT - unused
- * JE_SEG_ALIAS - aliases segments
- * JE_AREA - area description
- *
- * JE_LAST - largest possible journal entry number
- */
- enum {
- JE_FIRST = 0x01,
- JEG_BASE = 0x00,
- JE_COMMIT = 0x02,
- JE_DYNSB = 0x03,
- JE_ANCHOR = 0x04,
- JE_ERASECOUNT = 0x05,
- JE_SPILLOUT = 0x06,
- JE_OBJ_ALIAS = 0x0d,
- JE_AREA = 0x0e,
- JE_LAST = 0x0e,
- };
- #endif
|