From f5fed9ff093ac57967a4c5253bae3ae294dd8cd9 Mon Sep 17 00:00:00 2001 From: Alexander Larsson Date: Thu, 23 Mar 2023 10:50:34 +0100 Subject: [PATCH] Initial version of composefs-fuse Signed-off-by: Alexander Larsson --- configure.ac | 15 + hacking/installdeps.sh | 2 +- tools/Makefile.am | 10 +- tools/cfs-fuse.c | 1236 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 1261 insertions(+), 2 deletions(-) create mode 100644 tools/cfs-fuse.c diff --git a/configure.ac b/configure.ac index 407f5f34..9fcbfce8 100644 --- a/configure.ac +++ b/configure.ac @@ -176,6 +176,21 @@ else fi AM_CONDITIONAL([USE_YAJL],[test "$have_libyajl" = "yes"]) +AC_ARG_WITH(fuse, + AS_HELP_STRING([--with-fuse], [Build fuse support [default=auto]]), + , with_fuse=auto) +if test "x$with_fuse" != "xno"; then + PKG_CHECK_MODULES(FUSE3, [fuse3 >= 3.10.0], [have_fuse3=yes], [have_fuse3=no]) + if test $have_fuse3 = yes; then + AC_DEFINE(HAVE_FUSE3, 1, [Define if fuse3 is available]) + elif test "x$with_fuse" == "xyes"; then + AC_MSG_ERROR([fuse was requested but fuse3 it could not be found]) + fi +else + have_fuse3=no +fi +AM_CONDITIONAL([USE_FUSE3],[test "$have_fuse3" = "yes"]) + AC_FUNC_ERROR_AT_LINE AC_FUNC_FSEEKO AC_HEADER_MAJOR diff --git a/hacking/installdeps.sh b/hacking/installdeps.sh index b2e930b4..d4c70b87 100755 --- a/hacking/installdeps.sh +++ b/hacking/installdeps.sh @@ -1,4 +1,4 @@ #!/bin/bash set -xeuo pipefail export DEBIAN_FRONTEND=noninteractive -apt-get install -y automake libtool autoconf autotools-dev git make gcc libyajl-dev libssl-dev libfsverity-dev pkg-config +apt-get install -y automake libtool autoconf autotools-dev git make gcc libyajl-dev libssl-dev libfsverity-dev pkg-config libfuse3-dev diff --git a/tools/Makefile.am b/tools/Makefile.am index 0c0bf3a4..4bd8cc0f 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -1,11 +1,15 @@ bin_PROGRAMS = mkcomposefs sbin_PROGRAMS = mount.composefs -noinst_PROGRAMS = +noinst_PROGRAMS = if USE_YAJL noinst_PROGRAMS += composefs-from-json endif +if USE_FUSE3 +noinst_PROGRAMS += composefs-fuse +endif + AM_CFLAGS = $(WARN_CFLAGS) -I$(top_srcdir)/ mkcomposefs_SOURCES = mkcomposefs.c @@ -17,6 +21,10 @@ mount_composefs_LDADD = ../libcomposefs/libcomposefs.la $(LIBCRYPTO_LIBS) composefs_from_json_SOURCES = composefs-from-json.c read-file.c read-file.h composefs_from_json_LDADD = ../libcomposefs/libcomposefs.la $(LIBS_YAJL) $(LIBCRYPTO_LIBS) $(LIBS_SECCOMP) +composefs_fuse_SOURCES = cfs-fuse.c +composefs_fuse_LDADD = ../libcomposefs/libcomposefs.la $(FUSE3_LIBS) +composefs_fuse_CFLAGS = $(FUSE3_CFLAGS) + TEST_ASSETS_SMALL = \ config.json.gz config-with-hard-link.json.gz diff --git a/tools/cfs-fuse.c b/tools/cfs-fuse.c new file mode 100644 index 00000000..b2c866ac --- /dev/null +++ b/tools/cfs-fuse.c @@ -0,0 +1,1236 @@ +#define _GNU_SOURCE + +#define FUSE_USE_VERSION 34 + +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libcomposefs/lcfs-erofs.h" +#include "libcomposefs/lcfs-internal.h" + +/* TODO: + * Do we want to user ther negative_timeout=T option? + */ + +#define CFS_ENTRY_TIMEOUT 3600.0 +#define CFS_ATTR_TIMEOUT 3600.0 + +#define ALIGN_TO(_offset, _align_size) \ + (((_offset) + _align_size - 1) & ~(_align_size - 1)) + +/* Note: These only do power of 2 */ +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) +#define round_up(x, y) ((((x)-1) | __round_mask(x, y)) + 1) +#define round_down(x, y) ((x) & ~__round_mask(x, y)) + +#include "libcomposefs/erofs_fs_wrapper.h" + +const uint8_t *erofs_data; +size_t erofs_data_size; +uint64_t erofs_root_nid; +bool erofs_use_acl; +const struct erofs_super_block *erofs_super; +const struct lcfs_erofs_header_s *cfs_header; +const uint8_t *erofs_metadata; +const uint8_t *erofs_xattrdata; +uint64_t erofs_build_time; +uint32_t erofs_build_time_nsec; +int basedir_fd; + +static void printexit(const char *format, ...) +{ + va_list args; + + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + + exit(1); +} + +struct cfs_data { + const char *source; + const char *basedir; + bool noacl; +}; + +static const struct fuse_opt cfs_opts[] = { + { "source=%s", offsetof(struct cfs_data, source), 0 }, + { "basedir=%s", offsetof(struct cfs_data, basedir), 0 }, + { "noacl", offsetof(struct cfs_data, noacl), 1 }, + FUSE_OPT_END +}; + +typedef union { + __le16 i_format; + struct erofs_inode_compact compact; + struct erofs_inode_extended extended; +} erofs_inode; + +static uint64_t cfs_nid_from_ino(fuse_ino_t ino) +{ + if (ino == FUSE_ROOT_ID) { + return erofs_root_nid; + } + return ino; +} + +static fuse_ino_t cfs_ino_from_nid(uint64_t nid) +{ + if (nid == erofs_root_nid) { + return FUSE_ROOT_ID; + } + return nid; +} + +static const erofs_inode *cfs_get_erofs_inode(fuse_ino_t ino) +{ + uint64_t nid = cfs_nid_from_ino(ino); + + /* TODO: Add bounds check */ + + return (const erofs_inode *)(erofs_metadata + (nid << EROFS_ISLOTBITS)); +} + +static uint16_t erofs_inode_version(const erofs_inode *cino) +{ + uint16_t i_format = lcfs_u16_from_file(cino->i_format); + return (i_format & ((1 << EROFS_I_VERSION_BITS) - 1)) >> EROFS_I_VERSION_BIT; +} + +static bool erofs_inode_is_compact(const erofs_inode *cino) +{ + return erofs_inode_version(cino) == 0; +} + +static uint16_t erofs_inode_datalayout(const erofs_inode *cino) +{ + uint16_t i_format = lcfs_u16_from_file(cino->i_format); + return (i_format & ((1 << EROFS_I_DATALAYOUT_BITS) - 1)) >> + EROFS_I_DATALAYOUT_BIT; +} + +static bool erofs_inode_is_tailpacked(const erofs_inode *cino) +{ + return erofs_inode_datalayout(cino) == EROFS_INODE_FLAT_INLINE; +} + +static int cfs_stat(fuse_ino_t ino, const erofs_inode *cino, struct stat *stbuf) +{ + stbuf->st_ino = ino; + + if (erofs_inode_is_compact(cino)) { + const struct erofs_inode_compact *c = &cino->compact; + + stbuf->st_mode = lcfs_u16_from_file(c->i_mode); + stbuf->st_nlink = lcfs_u16_from_file(c->i_nlink); + stbuf->st_size = lcfs_u32_from_file(c->i_size); + stbuf->st_uid = lcfs_u16_from_file(c->i_uid); + stbuf->st_gid = lcfs_u16_from_file(c->i_gid); + + stbuf->st_mtim.tv_sec = erofs_build_time; + stbuf->st_mtim.tv_nsec = erofs_build_time_nsec; + } else { + const struct erofs_inode_extended *e = &cino->extended; + + stbuf->st_mode = lcfs_u16_from_file(e->i_mode); + stbuf->st_size = lcfs_u64_from_file(e->i_size); + stbuf->st_uid = lcfs_u32_from_file(e->i_uid); + stbuf->st_gid = lcfs_u32_from_file(e->i_gid); + stbuf->st_mtim.tv_sec = lcfs_u64_from_file(e->i_mtime); + stbuf->st_mtim.tv_nsec = lcfs_u32_from_file(e->i_mtime); + stbuf->st_nlink = lcfs_u32_from_file(e->i_nlink); + } + + return 0; +} + +static void cfs_getattr(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) +{ + const erofs_inode *cino = cfs_get_erofs_inode(ino); + struct stat stbuf; + + (void)fi; + + memset(&stbuf, 0, sizeof(stbuf)); + if (cino == NULL || cfs_stat(ino, cino, &stbuf) == -1) + fuse_reply_err(req, ENOENT); + else + fuse_reply_attr(req, &stbuf, CFS_ATTR_TIMEOUT); +} + +static mode_t erofs_inode_get_mode(const erofs_inode *cino) +{ + if (erofs_inode_is_compact(cino)) { + const struct erofs_inode_compact *c = &cino->compact; + return lcfs_u16_from_file(c->i_mode); + } else { + const struct erofs_inode_extended *e = &cino->extended; + return lcfs_u16_from_file(e->i_mode); + } +} + +static void erofs_inode_get_info(const erofs_inode *cino, uint32_t *mode, + uint64_t *file_size, uint16_t *xattr_icount, + uint32_t *raw_blkaddr, size_t *isize) +{ + if (erofs_inode_is_compact(cino)) { + const struct erofs_inode_compact *c = &cino->compact; + + *mode = lcfs_u16_from_file(c->i_mode); + *file_size = lcfs_u32_from_file(c->i_size); + *xattr_icount = lcfs_u16_from_file(c->i_xattr_icount); + *raw_blkaddr = lcfs_u32_from_file(c->i_u.raw_blkaddr); + *isize = sizeof(struct erofs_inode_compact); + } else { + const struct erofs_inode_extended *e = &cino->extended; + *mode = lcfs_u16_from_file(e->i_mode); + *file_size = lcfs_u64_from_file(e->i_size); + *xattr_icount = lcfs_u16_from_file(e->i_xattr_icount); + *raw_blkaddr = lcfs_u32_from_file(e->i_u.raw_blkaddr); + *isize = sizeof(struct erofs_inode_extended); + } +} + +#define min(a, b) (((a) < (b)) ? (a) : (b)) + +/* This is essentially strcmp() for non-null-terminated strings */ +static inline int memcmp2(const void *a, const size_t a_size, const void *b, + size_t b_size) +{ + size_t common_size = min(a_size, b_size); + int res; + + res = memcmp(a, b, common_size); + if (res != 0 || a_size == b_size) + return res; + + return a_size < b_size ? -1 : 1; +} + +static bool cfs_lookup_block(fuse_req_t req, const uint8_t *block, + size_t block_size, const char *name, int *cmp_out) +{ + const struct erofs_dirent *dirents = (struct erofs_dirent *)block; + size_t n_dirents; + size_t name_len = strlen(name); + ssize_t start_dirent, end_dirent; + int cmp = -1; + + n_dirents = lcfs_u16_from_file(dirents[0].nameoff) / + sizeof(struct erofs_dirent); + + start_dirent = 0; + end_dirent = n_dirents - 1; + while (start_dirent <= end_dirent) { + ssize_t mid_dirent = start_dirent + (end_dirent - start_dirent) / 2; + uint16_t nameoff = lcfs_u16_from_file(dirents[mid_dirent].nameoff); + const char *child_name = (const char *)(block + nameoff); + uint16_t child_name_len; + + if (mid_dirent + 1 < n_dirents) + child_name_len = + lcfs_u16_from_file(dirents[mid_dirent + 1].nameoff) - + nameoff; + else + child_name_len = strnlen(child_name, block_size - nameoff); + + cmp = memcmp2(name, name_len, child_name, child_name_len); + if (cmp == 0) { + uint64_t nid = lcfs_u64_from_file(dirents[mid_dirent].nid); + const erofs_inode *child_cino = cfs_get_erofs_inode(nid); + struct fuse_entry_param e; + + memset(&e, 0, sizeof(e)); + e.ino = cfs_ino_from_nid(nid); + e.attr_timeout = CFS_ATTR_TIMEOUT; + e.entry_timeout = CFS_ENTRY_TIMEOUT; + cfs_stat(e.ino, child_cino, &e.attr); + + fuse_reply_entry(req, &e); + + return true; + } else { + if (cmp > 0) + start_dirent = mid_dirent + 1; + else + end_dirent = mid_dirent - 1; + } + } + + if (end_dirent < 0) { + *cmp_out = -1; + } else if (start_dirent >= n_dirents) { + *cmp_out = 1; + } else { + *cmp_out = 0; /* inside the block */ + } + + return false; +} + +static void cfs_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) +{ + const erofs_inode *parent_cino = cfs_get_erofs_inode(parent); + uint32_t mode; + uint64_t file_size; + uint16_t xattr_icount; + uint32_t raw_blkaddr; + size_t xattr_size; + size_t isize; + uint64_t n_blocks; + uint64_t last_block; + bool tailpacked; + int start_block, end_block; + int cmp; + + if (parent_cino == NULL) { + fuse_reply_err(req, ENOENT); + return; + } + + erofs_inode_get_info(parent_cino, &mode, &file_size, &xattr_icount, + &raw_blkaddr, &isize); + + if ((mode & S_IFMT) != S_IFDIR) { + fuse_reply_err(req, ENOTDIR); + return; + } + + xattr_size = 0; + if (xattr_icount > 0) + xattr_size = sizeof(struct erofs_xattr_ibody_header) + + (xattr_icount - 1) * 4; + + tailpacked = erofs_inode_is_tailpacked(parent_cino); + n_blocks = round_up(file_size, EROFS_BLKSIZ) / EROFS_BLKSIZ; + last_block = tailpacked ? n_blocks - 1 : n_blocks; + + /* First read the out-of-band blocks */ + start_block = 0; + end_block = last_block - 1; + while (start_block <= end_block) { + int mid_block = start_block + (end_block - start_block) / 2; + const uint8_t *block_data = + erofs_data + ((raw_blkaddr + mid_block) * EROFS_BLKSIZ); + size_t block_size = EROFS_BLKSIZ; + int cmp; + + if (!tailpacked && mid_block + 1 == last_block) { + block_size = file_size % EROFS_BLKSIZ; + if (block_size == 0) { + block_size = EROFS_BLKSIZ; + } + } + + if (cfs_lookup_block(req, block_data, block_size, name, &cmp)) { + return; /* Found a match */ + } + + if (cmp == 0) + /* The name should have been in this block, + but wasn't */ + goto noent; + else if (cmp > 0) + start_block = mid_block + 1; + else if (mid_block > 0) + end_block = mid_block - 1; + } + + if (tailpacked && start_block > end_block) { + const uint8_t *block_data = + ((uint8_t *)parent_cino) + isize + xattr_size; + if (cfs_lookup_block(req, block_data, file_size % EROFS_BLKSIZ, + name, &cmp)) + return; + } + +noent: + fuse_reply_err(req, ENOENT); +} + +static mode_t erofs_file_type_to_mode(int file_type) +{ + switch (file_type) { + case EROFS_FT_SYMLINK: + return S_IFLNK; + case EROFS_FT_DIR: + return S_IFDIR; + case EROFS_FT_REG_FILE: + return S_IFREG; + case EROFS_FT_BLKDEV: + return S_IFBLK; + case EROFS_FT_CHRDEV: + return S_IFCHR; + case EROFS_FT_SOCK: + return S_IFSOCK; + case EROFS_FT_FIFO: + return S_IFIFO; + default: + return 0; + } +} + +struct dirbuf { + uint8_t *buf; + size_t current_size; + size_t max_size; + + off_t offset; +}; + +static bool cfs_readdir_block(fuse_req_t req, struct dirbuf *buf, + const uint8_t *block, size_t block_size, + size_t block_start, bool use_plus) +{ + const struct erofs_dirent *dirents = (struct erofs_dirent *)block; + size_t dirents_size = lcfs_u16_from_file(dirents[0].nameoff); + size_t n_dirents, i; + size_t start_dirent; + + if (dirents_size % sizeof(struct erofs_dirent) != 0) { + /* This should not happen for valid filesystems */ + return false; + } + + assert(buf->offset >= block_start); + + n_dirents = dirents_size / sizeof(struct erofs_dirent); + /* Round up to ensure we start looking at even dirent position, if the user passed some weird offset. */ + start_dirent = + (buf->offset - block_start + sizeof(struct erofs_dirent) - 1) / + sizeof(struct erofs_dirent); + if (start_dirent >= n_dirents) { + return false; + } + + buf->offset = block_start + sizeof(struct erofs_dirent) * start_dirent; + + /* Check if it outside the dirents part */ + if (buf->offset - block_start >= dirents_size) { + /* Move to next block */ + buf->offset = block_start + EROFS_BLKSIZ; + return false; + } + + for (i = start_dirent; i < n_dirents; i++) { + char name_buf[PATH_MAX]; + uint64_t nid = lcfs_u64_from_file(dirents[i].nid); + uint16_t nameoff = lcfs_u16_from_file(dirents[i].nameoff); + const char *child_name; + uint16_t child_name_len; + struct stat stbuf; + size_t res; + size_t remaining_size; + off_t next_offset; + + assert(buf->offset == block_start + i * sizeof(struct erofs_dirent)); + + /* After last dirent, we go directly to next block */ + if (i < n_dirents - 1) { + next_offset = buf->offset + sizeof(struct erofs_dirent); + } else { + next_offset = block_start + EROFS_BLKSIZ; + } + + /* Compute length of the name, which is a bit weird for the last dirent */ + child_name = (char *)(block + nameoff); + if (i + 1 < n_dirents) + child_name_len = + lcfs_u16_from_file(dirents[i + 1].nameoff) - nameoff; + else + child_name_len = strnlen(child_name, block_size - nameoff); + + /* We have to copy to be able to null terminate for fuse_add_direntry, lame */ + child_name_len = min(child_name_len, PATH_MAX - 1); + memcpy(name_buf, child_name, child_name_len); + name_buf[child_name_len] = 0; + + remaining_size = buf->max_size - buf->current_size; + if (use_plus) { + const erofs_inode *child_cino = cfs_get_erofs_inode(nid); + struct fuse_entry_param e; + + memset(&e, 0, sizeof(e)); + e.ino = cfs_ino_from_nid(nid); + e.attr_timeout = CFS_ATTR_TIMEOUT; + e.entry_timeout = CFS_ENTRY_TIMEOUT; + cfs_stat(e.ino, child_cino, &e.attr); + + res = fuse_add_direntry_plus( + req, (char *)(buf->buf + buf->current_size), + remaining_size, name_buf, &e, next_offset); + } else { + uint8_t type = dirents[i].file_type; + memset(&stbuf, 0, sizeof(stbuf)); + stbuf.st_ino = cfs_ino_from_nid(nid); + stbuf.st_mode = erofs_file_type_to_mode(type); + res = fuse_add_direntry( + req, (char *)(buf->buf + buf->current_size), + remaining_size, name_buf, &stbuf, next_offset); + } + if (res <= remaining_size) { + buf->current_size += res; + } else { + /* didn't fit, stop */ + return true; + } + buf->offset = next_offset; + } + + return false; +} + +static void _cfs_readdir(fuse_req_t req, fuse_ino_t ino, size_t max_size, + off_t off, struct fuse_file_info *fi, bool use_plus) +{ + const erofs_inode *cino = (const erofs_inode *)(uintptr_t)fi->fh; + uint32_t mode; + uint64_t file_size; + uint16_t xattr_icount; + size_t xattr_size; + size_t isize; + uint64_t n_blocks; + uint64_t last_block; + size_t first_block; + bool tailpacked; + uint32_t raw_blkaddr; + uint8_t bufdata[max_size]; + bool done; + struct dirbuf buf = { + .buf = bufdata, + .current_size = 0, + .max_size = max_size, + .offset = off, + }; + + erofs_inode_get_info(cino, &mode, &file_size, &xattr_icount, + &raw_blkaddr, &isize); + + xattr_size = 0; + if (xattr_icount > 0) + xattr_size = sizeof(struct erofs_xattr_ibody_header) + + (xattr_icount - 1) * 4; + + tailpacked = erofs_inode_is_tailpacked(cino); + n_blocks = round_up(file_size, EROFS_BLKSIZ) / EROFS_BLKSIZ; + last_block = tailpacked ? n_blocks - 1 : n_blocks; + first_block = buf.offset / EROFS_BLKSIZ; + + if (first_block >= n_blocks) { + goto out; + } + + /* First read the out-of-band blocks */ + done = false; + for (uint64_t block = first_block; block < last_block; block++) { + size_t block_start = block * EROFS_BLKSIZ; + size_t block_size = EROFS_BLKSIZ; + + if (!tailpacked && block + 1 == last_block) { + block_size = file_size % EROFS_BLKSIZ; + if (block_size == 0) { + block_size = EROFS_BLKSIZ; + } + } + + if (buf.offset >= block_start && + buf.offset < block_start + block_size) { + const uint8_t *block_data = + erofs_data + raw_blkaddr * EROFS_BLKSIZ + block_start; + if (cfs_readdir_block(req, &buf, block_data, block_size, + block_start, use_plus)) { + done = true; + break; + } + } + } + + if (!done && tailpacked) { + size_t block_start = last_block * EROFS_BLKSIZ; + size_t block_size = file_size % EROFS_BLKSIZ; + + if (buf.offset >= block_start && + buf.offset < block_start + block_size) { + const uint8_t *block_data = + ((uint8_t *)cino) + isize + xattr_size; + cfs_readdir_block(req, &buf, block_data, block_size, + block_start, use_plus); + } + } + +out: + fuse_reply_buf(req, (char *)bufdata, buf.current_size); +} + +static void cfs_readdir(fuse_req_t req, fuse_ino_t ino, size_t max_size, + off_t off, struct fuse_file_info *fi) +{ + _cfs_readdir(req, ino, max_size, off, fi, false); +} + +static void cfs_readdir_plus(fuse_req_t req, fuse_ino_t ino, size_t max_size, + off_t off, struct fuse_file_info *fi) +{ + _cfs_readdir(req, ino, max_size, off, fi, true); +} + +static void cfs_opendir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) +{ + const erofs_inode *cino = cfs_get_erofs_inode(ino); + mode_t mode; + + if (cino == NULL) { + fuse_reply_err(req, ENOENT); + return; + } + + mode = erofs_inode_get_mode(cino); + + if ((mode & S_IFMT) != S_IFDIR) { + fuse_reply_err(req, ENOTDIR); + return; + } + + fi->keep_cache = 1; + fi->cache_readdir = 1; + fi->fh = (uintptr_t)cino; + fuse_reply_open(req, fi); +} + +static void cfs_readlink(fuse_req_t req, fuse_ino_t ino) +{ + const erofs_inode *cino = cfs_get_erofs_inode(ino); + uint32_t mode; + uint64_t file_size; + uint16_t xattr_icount; + uint32_t raw_blkaddr; + size_t isize; + size_t xattr_size; + bool tailpacked; + char name_buf[PATH_MAX]; + + if (cino == NULL) { + fuse_reply_err(req, ENOENT); + return; + } + + erofs_inode_get_info(cino, &mode, &file_size, &xattr_icount, + &raw_blkaddr, &isize); + + if ((mode & S_IFMT) != S_IFLNK) { + fuse_reply_err(req, EINVAL); + return; + } + + /* Avoid overwriting name_buf below */ + if (file_size >= PATH_MAX) { + fuse_reply_err(req, EIO); + return; + } + + xattr_size = 0; + if (xattr_icount > 0) + xattr_size = sizeof(struct erofs_xattr_ibody_header) + + (xattr_icount - 1) * 4; + + tailpacked = erofs_inode_is_tailpacked(cino); + if (!tailpacked) { + fuse_reply_err(req, EINVAL); + return; + } + + const uint8_t *inline_data = ((uint8_t *)cino) + isize + xattr_size; + + memcpy(name_buf, inline_data, file_size); + name_buf[file_size] = 0; + + fuse_reply_readlink(req, name_buf); +} + +static void cfs_init(void *userdata, struct fuse_conn_info *conn) +{ + if (conn->capable & FUSE_CAP_CACHE_SYMLINKS) + conn->want |= FUSE_CAP_CACHE_SYMLINKS; + + if (conn->capable & FUSE_CAP_EXPORT_SUPPORT) + conn->want |= FUSE_CAP_EXPORT_SUPPORT; + + if (erofs_use_acl && conn->capable & FUSE_CAP_POSIX_ACL) + conn->want |= FUSE_CAP_POSIX_ACL; + + if (conn->capable & FUSE_CAP_SPLICE_WRITE) + conn->want |= FUSE_CAP_SPLICE_WRITE; + if (conn->capable & FUSE_CAP_SPLICE_READ) + conn->want |= FUSE_CAP_SPLICE_READ; +} + +const char *erofs_xattr_prefixes[] = { + "", + "user.", + "system.posix_acl_access", + "system.posix_acl_default", + "trusted.", + "lustre.", + "security.", +}; + +#define EROFS_N_PREFIXES (sizeof(erofs_xattr_prefixes) / sizeof(char *)) + +static bool is_acl_xattr(int prefix, const char *name, size_t name_len) +{ + const char *const nfs_acl = "system.nfs4_acl"; + + if ((prefix == EROFS_XATTR_INDEX_POSIX_ACL_ACCESS || + prefix == EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT) && + name_len == 0) + return true; + if (prefix == 0 && name_len == strlen(nfs_acl) && + memcmp(name, nfs_acl, strlen(nfs_acl)) == 0) + return true; + return false; +} + +static int erofs_get_xattr_prefix(const char *str) +{ + for (int i = 1; i < EROFS_N_PREFIXES; i++) { + const char *prefix = erofs_xattr_prefixes[i]; + if (strlen(str) >= strlen(prefix) && + memcmp(str, prefix, strlen(prefix)) == 0) { + return i; + } + } + return 0; +} + +#define OVERLAY_PREFIX "overlay." + +static int cfs_rewrite_xattr_prefix_from_image(int name_index, const char *name, + size_t name_len) +{ + /* We rewrite trusted.overlay.* to user.overlay.* */ + if (name_index == EROFS_XATTR_INDEX_TRUSTED && + name_len > strlen(OVERLAY_PREFIX) && + memcmp(name, OVERLAY_PREFIX, strlen(OVERLAY_PREFIX)) == 0) + return EROFS_XATTR_INDEX_USER; + + return name_index; +} + +static int cfs_rewrite_xattr_prefix_to_image(int name_index, const char *name, + size_t name_len) +{ + /* We rewrite trusted.overlay.* to user.overlay.* */ + if (name_index == EROFS_XATTR_INDEX_USER && + name_len > strlen(OVERLAY_PREFIX) && + memcmp(name, OVERLAY_PREFIX, strlen(OVERLAY_PREFIX)) == 0) + return EROFS_XATTR_INDEX_TRUSTED; + + return name_index; +} + +static int cfs_listxattr_element(const struct erofs_xattr_entry *entry, + char *buf, size_t *buf_size, size_t max_buf_size) +{ + const char *name = (const char *)entry + sizeof(struct erofs_xattr_entry); + uint8_t name_len = entry->e_name_len; + uint8_t name_index = entry->e_name_index; + size_t full_name_len; + const char *prefix; + + name_index = cfs_rewrite_xattr_prefix_from_image(name_index, name, name_len); + + prefix = erofs_xattr_prefixes[name_index]; + full_name_len = name_len + strlen(prefix); + + if (max_buf_size != 0 && max_buf_size - *buf_size < full_name_len + 1) + return -ERANGE; + + if (max_buf_size != 0) + memcpy(buf + *buf_size, prefix, strlen(prefix)); + *buf_size += strlen(prefix); + if (max_buf_size != 0) + memcpy(buf + *buf_size, name, name_len); + *buf_size += name_len; + if (max_buf_size != 0) + buf[*buf_size] = 0; + *buf_size += 1; + + return 0; +} + +static void cfs_listxattr(fuse_req_t req, fuse_ino_t ino, size_t max_size) +{ + const erofs_inode *cino = cfs_get_erofs_inode(ino); + uint32_t mode; + uint64_t file_size; + uint16_t xattr_icount; + uint32_t raw_blkaddr; + size_t isize; + size_t xattr_size; + char buf[max_size]; + size_t buf_size; + uint8_t shared_count; + const struct erofs_xattr_ibody_header *xattr_header; + const uint8_t *xattrs_inline; + const uint8_t *xattrs_start; + const uint8_t *xattrs_end; + int res; + + if (cino == NULL) { + fuse_reply_err(req, ENOENT); + return; + } + + erofs_inode_get_info(cino, &mode, &file_size, &xattr_icount, + &raw_blkaddr, &isize); + + if (xattr_icount == 0) { + /* No xattrs */ + + if (max_size == 0) { + fuse_reply_xattr(req, 0); + } else { + fuse_reply_buf(req, buf, 0); + } + return; + } + + xattr_size = 0; + if (xattr_icount > 0) + xattr_size = sizeof(struct erofs_xattr_ibody_header) + + (xattr_icount - 1) * 4; + + xattrs_start = ((uint8_t *)cino) + isize; + xattrs_end = ((uint8_t *)cino) + isize + xattr_size; + xattr_header = (struct erofs_xattr_ibody_header *)xattrs_start; + shared_count = xattr_header->h_shared_count; + + buf_size = 0; + xattrs_inline = xattrs_start + sizeof(struct erofs_xattr_ibody_header) + + shared_count * 4; + + /* Inline xattrs */ + while (xattrs_inline + sizeof(struct erofs_xattr_entry) < xattrs_end) { + const struct erofs_xattr_entry *entry = + (const struct erofs_xattr_entry *)xattrs_inline; + uint8_t name_len = entry->e_name_len; + uint16_t value_size = lcfs_u16_from_file(entry->e_value_size); + size_t el_size = round_up( + sizeof(struct erofs_xattr_entry) + name_len + value_size, 4); + + res = cfs_listxattr_element(entry, buf, &buf_size, max_size); + if (res < 0) { + fuse_reply_err(req, -res); + return; + } + xattrs_inline += el_size; + } + + /* Shared xattrs */ + for (int i = 0; i < shared_count; i++) { + uint32_t idx = lcfs_u32_from_file(xattr_header->h_shared_xattrs[i]); + const struct erofs_xattr_entry *entry = + (const struct erofs_xattr_entry *)(erofs_xattrdata + idx * 4); + + res = cfs_listxattr_element(entry, buf, &buf_size, max_size); + if (res < 0) { + fuse_reply_err(req, -res); + return; + } + } + + if (max_size == 0) { + fuse_reply_xattr(req, buf_size); + } else { + fuse_reply_buf(req, buf, buf_size); + } +} + +static int match_xattr_entry(const struct erofs_xattr_entry *entry, + int name_prefix, const char *name, size_t name_len) +{ + uint8_t e_name_len = entry->e_name_len; + uint8_t e_name_prefix = entry->e_name_index; + const char *e_name = (const char *)entry + sizeof(struct erofs_xattr_entry); + + return e_name_prefix == name_prefix && e_name_len == name_len && + memcmp(name, e_name, name_len) == 0; +} + +static const char *do_getxattr(const erofs_inode *cino, int name_prefix, + const char *name, uint16_t *value_size_out) +{ + size_t name_len = strlen(name); + uint32_t mode; + uint64_t file_size; + uint16_t xattr_icount; + uint32_t raw_blkaddr; + size_t isize; + size_t xattr_size; + uint8_t shared_count; + const struct erofs_xattr_ibody_header *xattr_header; + const uint8_t *xattrs_inline; + const uint8_t *xattrs_start; + const uint8_t *xattrs_end; + + erofs_inode_get_info(cino, &mode, &file_size, &xattr_icount, + &raw_blkaddr, &isize); + + if (xattr_icount == 0) { + return NULL; + } + + xattr_size = 0; + if (xattr_icount > 0) + xattr_size = sizeof(struct erofs_xattr_ibody_header) + + (xattr_icount - 1) * 4; + + xattrs_start = ((uint8_t *)cino) + isize; + xattrs_end = ((uint8_t *)cino) + isize + xattr_size; + xattr_header = (struct erofs_xattr_ibody_header *)xattrs_start; + shared_count = xattr_header->h_shared_count; + + xattrs_inline = xattrs_start + sizeof(struct erofs_xattr_ibody_header) + + shared_count * 4; + + /* Inline xattrs */ + while (xattrs_inline + sizeof(struct erofs_xattr_entry) < xattrs_end) { + const struct erofs_xattr_entry *entry = + (const struct erofs_xattr_entry *)xattrs_inline; + uint8_t e_name_len = entry->e_name_len; + uint16_t value_size = lcfs_u16_from_file(entry->e_value_size); + size_t el_size = round_up(sizeof(struct erofs_xattr_entry) + + e_name_len + value_size, + 4); + + if (match_xattr_entry(entry, name_prefix, name, name_len)) { + const char *value = (const char *)entry + + sizeof(struct erofs_xattr_entry) + + e_name_len; + *value_size_out = value_size; + return value; + } + + xattrs_inline += el_size; + } + + /* Shared xattrs */ + for (int i = 0; i < shared_count; i++) { + uint32_t idx = lcfs_u32_from_file(xattr_header->h_shared_xattrs[i]); + const struct erofs_xattr_entry *entry = + (const struct erofs_xattr_entry *)(erofs_xattrdata + idx * 4); + + if (match_xattr_entry(entry, name_prefix, name, name_len)) { + uint16_t value_size = + lcfs_u16_from_file(entry->e_value_size); + uint8_t e_name_len = entry->e_name_len; + const char *value = (const char *)entry + + sizeof(struct erofs_xattr_entry) + + e_name_len; + *value_size_out = value_size; + return value; + } + } + + return NULL; +} + +static void cfs_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + size_t max_size) +{ + const erofs_inode *cino = cfs_get_erofs_inode(ino); + int name_prefix; + size_t name_len; + const char *value; + uint16_t value_size; + + if (cino == NULL) { + fuse_reply_err(req, ENOENT); + return; + } + + /* Handle prefix part */ + name_prefix = erofs_get_xattr_prefix(name); + name += strlen(erofs_xattr_prefixes[name_prefix]); + name_len = strlen(name); + + name_prefix = cfs_rewrite_xattr_prefix_to_image(name_prefix, name, name_len); + + /* When acls are not used, send EOPTNOTSUPP, as this informs + userspace to stop constantly looking for acls */ + if (!erofs_use_acl && is_acl_xattr(name_prefix, name, name_len)) { + fuse_reply_err(req, EOPNOTSUPP); + return; + } + + value = do_getxattr(cino, name_prefix, name, &value_size); + if (value == NULL) { + fuse_reply_err(req, ENODATA); + return; + } + + if (max_size == 0) { + fuse_reply_xattr(req, value_size); + } else if (max_size < value_size) { + fuse_reply_err(req, ERANGE); + } else { + fuse_reply_buf(req, (const char *)value, value_size); + } +} + +static void cfs_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) +{ + const erofs_inode *cino = cfs_get_erofs_inode(ino); + int fd; + const char *redirect; + uint16_t value_size; + + if ((fi->flags & O_ACCMODE) == O_WRONLY || (fi->flags & O_ACCMODE) == O_RDWR) + return (void)fuse_reply_err(req, EROFS); + + if (cino == NULL) { + fuse_reply_err(req, ENOENT); + return; + } + + redirect = do_getxattr(cino, EROFS_XATTR_INDEX_TRUSTED, + "overlay.redirect", &value_size); + + if (redirect == NULL) { + /* Empty files have no redirect */ + fd = -1; + } else { + while (*redirect == '/') + redirect++; + + fd = openat(basedir_fd, redirect, + O_CLOEXEC | O_NOCTTY | O_NOFOLLOW | O_RDONLY, 0); + if (fd < 0) { + fuse_reply_err(req, -errno); + return; + } + + /* TODO: Verify fs-verity */ + } + + fi->fh = fd; + fi->keep_cache = 1; + + return (void)fuse_reply_open(req, fi); +} + +static void cfs_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) +{ + int fd = fi->fh; + + if (fd >= 0) + close(fd); + fuse_reply_err(req, 0); +} + +static void cfs_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, + struct fuse_file_info *fi) +{ + struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); + int fd = fi->fh; + char c; + + if (fd < 0) { + c = 0; + fuse_reply_buf(req, &c, 0); + } else { + buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; + buf.buf[0].fd = fd; + buf.buf[0].pos = offset; + + fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); + } +} + +static void cfs_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, + struct fuse_file_info *fi) +{ + int fd = fi->fh; + off_t res; + + (void)ino; + + if (fd < 0) { + if (off > 0 || (whence == SEEK_DATA)) { + fuse_reply_err(req, ENXIO); + } else { + fuse_reply_lseek(req, 0); + } + return; + } + + res = lseek(fd, off, whence); + if (res != -1) + fuse_reply_lseek(req, res); + else + fuse_reply_err(req, errno); +} + +static const struct fuse_lowlevel_ops cfs_oper = { + .init = cfs_init, + .lookup = cfs_lookup, + .getattr = cfs_getattr, + .opendir = cfs_opendir, + .readdir = cfs_readdir, + .readdirplus = cfs_readdir_plus, + .readlink = cfs_readlink, + .listxattr = cfs_listxattr, + .getxattr = cfs_getxattr, + .open = cfs_open, + .release = cfs_release, + .read = cfs_read, + .lseek = cfs_lseek, +}; + +int main(int argc, char *argv[]) +{ + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); + struct fuse_session *se; + struct fuse_cmdline_opts opts; + struct fuse_loop_config config; + struct cfs_data data = { .source = NULL }; + int fd; + struct stat s; + int r; + uint32_t cfs_flags; + + int ret = -1; + + if (fuse_parse_cmdline(&args, &opts) != 0) + return 1; + if (opts.show_help) { + printf("usage: %s [options] \n\n", argv[0]); + fuse_cmdline_help(); + fuse_lowlevel_help(); + ret = 0; + goto err_out1; + } else if (opts.show_version) { + printf("FUSE library version %s\n", fuse_pkgversion()); + fuse_lowlevel_version(); + ret = 0; + goto err_out1; + } + + if (opts.mountpoint == NULL) { + printf("usage: %s [options] \n", argv[0]); + printf(" %s --help\n", argv[0]); + ret = 1; + goto err_out1; + } + + /* We always want the kernel to handle the permissions */ + fuse_opt_add_arg(&args, "-o"); + fuse_opt_add_arg(&args, "ro,default_permissions"); + + if (fuse_opt_parse(&args, &data, cfs_opts, NULL) == -1) + return 1; + + fd = open(data.source, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + printexit("Failed to open %s\n", data.source); + } + + r = fstat(fd, &s); + if (r < 0) { + printexit("Failed to stat %s\n", data.source); + } + erofs_data_size = s.st_size; + + if (erofs_data_size < EROFS_BLKSIZ) { + printexit("To small image\n"); + } + + /* Memory-map the file. */ + erofs_data = mmap(0, erofs_data_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (erofs_data == MAP_FAILED) { + printexit("Failed to mmap %s\n", argv[1]); + } + close(fd); + + basedir_fd = open(data.basedir, O_RDONLY | O_PATH); + if (basedir_fd < 0) { + printexit("Failed to open basedir %s\n", data.basedir); + } + + cfs_header = (struct lcfs_erofs_header_s *)(erofs_data); + if (lcfs_u32_from_file(cfs_header->magic) != LCFS_EROFS_MAGIC) { + printexit("Wrong cfs magic"); + } + + cfs_flags = lcfs_u32_from_file(cfs_header->flags); + if (cfs_flags & LCFS_EROFS_FLAGS_HAS_ACL && !data.noacl) + erofs_use_acl = true; + + erofs_super = (struct erofs_super_block *)(erofs_data + EROFS_SUPER_OFFSET); + + if (lcfs_u32_from_file(erofs_super->magic) != EROFS_SUPER_MAGIC_V1) { + printexit("Wrong erofs magic"); + } + + erofs_metadata = erofs_data + lcfs_u32_from_file(erofs_super->meta_blkaddr) * + EROFS_BLKSIZ; + erofs_xattrdata = + erofs_data + + lcfs_u32_from_file(erofs_super->xattr_blkaddr) * EROFS_BLKSIZ; + + erofs_root_nid = lcfs_u16_from_file(erofs_super->root_nid); + erofs_build_time = lcfs_u64_from_file(erofs_super->build_time); + erofs_build_time_nsec = lcfs_u32_from_file(erofs_super->build_time_nsec); + + se = fuse_session_new(&args, &cfs_oper, sizeof(cfs_oper), NULL); + if (se == NULL) + goto err_out1; + + if (fuse_set_signal_handlers(se) != 0) + goto err_out2; + + if (fuse_session_mount(se, opts.mountpoint) != 0) + goto err_out3; + + fuse_daemonize(opts.foreground); + + /* Block until ctrl+c or fusermount -u */ + if (opts.singlethread) + ret = fuse_session_loop(se); + else { + config.clone_fd = true; + config.max_idle_threads = opts.max_idle_threads; + ret = fuse_session_loop_mt(se, &config); + } + + fuse_session_unmount(se); +err_out3: + fuse_remove_signal_handlers(se); +err_out2: + fuse_session_destroy(se); +err_out1: + free(opts.mountpoint); + fuse_opt_free_args(&args); + + return ret ? 1 : 0; +}