diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index df210b28..42211d89 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,12 +32,14 @@ jobs: options: "--privileged --pid=host -v /var/tmp:/var/tmp --tmpfs /tmp:rw,exec,nosuid,nodev -v /:/run/host" steps: - - run: dnf -y install cargo clippy composefs-devel e2fsprogs just ostree rustfmt gcc-c++ + - run: dnf -y install cargo clippy composefs-devel e2fsprogs fsverity-utils just ostree rustfmt gcc-c++ - name: Enable fs-verity on / run: tune2fs -O verity $(findmnt -vno SOURCE /run/host) - uses: actions/checkout@v7 - name: Run all checks (clippy, fmt, feature combos, tests) run: env CFS_TEST_TMPDIR=/run/host/var/tmp just check + - name: Run mount.composefs tests + run: env TMPDIR=/run/host/var/tmp just test-mount-composefs # Fast smoke test — catches basic breakage before spending time on # container builds and VM boots. Runs only the unprivileged tests @@ -88,6 +90,20 @@ jobs: crates/composefs/fuzz/artifacts/ target/fuzz-logs/ + # C API compatibility: builds the Rust cdylib, then runs the C + # composefs test suite (test-checksums, test-units) against it. + capi: + name: C API compatibility + needs: smoke + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v7 + - uses: dtolnay/rust-toolchain@stable + - uses: Swatinem/rust-cache@v2 + - uses: extractions/setup-just@v4 + - name: Run C test suite against Rust libcomposefs + run: just test-capi + # Full integration tests: builds a bootc container image, runs all # tests (both unprivileged and privileged). Privileged tests execute # inside bcvk ephemeral VMs booted from the container image. @@ -222,7 +238,7 @@ jobs: # repository settings as the single required status check. required-checks: if: always() - needs: [nightly, fedora, smoke, fuzz, integration, examples] + needs: [nightly, fedora, smoke, fuzz, capi, integration, examples] runs-on: ubuntu-latest steps: - run: exit 1 @@ -231,5 +247,6 @@ jobs: needs.fedora.result != 'success' || needs.smoke.result != 'success' || needs.fuzz.result != 'success' || + needs.capi.result != 'success' || needs.integration.result != 'success' || needs.examples.result != 'success' diff --git a/Justfile b/Justfile index 90991eeb..e9a3d3f5 100644 --- a/Justfile +++ b/Justfile @@ -72,6 +72,10 @@ test-integration *ARGS: build cargo test -p composefs-integration-tests --test cfsctl-integration-tests -- --skip privileged_ {{ ARGS }} fi +# Run mount.composefs shell tests (needs fsverity-utils; mount tests need root) +test-mount-composefs: build + crates/composefs-ctl/tests/test-mount-composefs.sh $(pwd)/target/debug/cfsctl + # Build the test container image for VM-based integration tests _integration-container-build: podman build --build-arg base_image={{base_image}} --build-arg cfsctl_features={{cfsctl_features}} -t {{_test_image}} . @@ -141,6 +145,11 @@ generate-corpus: fuzz-list: cd crates/composefs && cargo +nightly fuzz list +# Test composefs-capi against the C composefs test suite in a container. +# Optionally pass the path to a local checkout of the C composefs repo. +test-capi *ARGS: + crates/composefs-capi/tests/test-capi-container.sh {{ARGS}} + # Clean build artifacts clean: cargo clean diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..bed4f80e --- /dev/null +++ b/Makefile @@ -0,0 +1,36 @@ +PREFIX ?= /usr/local +DESTDIR ?= +LIBDIR ?= $(PREFIX)/lib +SBINDIR ?= $(PREFIX)/sbin +BINDIR ?= $(PREFIX)/bin +MANDIR ?= $(PREFIX)/share/man + +.PHONY: build-capi install-capi build-ctl install-ctl install-man + +build-capi: + cargo build --release -p composefs-capi + +install-capi: build-capi + LIBDIR=$(DESTDIR)$(LIBDIR) crates/composefs-capi/install.sh $(DESTDIR)$(PREFIX) + +build-ctl: + cargo build --release -p composefs-ctl + +install-ctl: build-ctl + install -d $(DESTDIR)$(BINDIR) $(DESTDIR)$(SBINDIR) + install -m 755 target/release/cfsctl $(DESTDIR)$(BINDIR)/cfsctl + ln -sf $(BINDIR)/cfsctl $(DESTDIR)$(SBINDIR)/mount.composefs + ln -sf $(BINDIR)/cfsctl $(DESTDIR)$(BINDIR)/mkcomposefs + ln -sf $(BINDIR)/cfsctl $(DESTDIR)$(BINDIR)/composefs-info + +install-man: + install -d $(DESTDIR)$(MANDIR)/man1 $(DESTDIR)$(MANDIR)/man5 $(DESTDIR)$(MANDIR)/man8 + for md in man/*.md; do \ + base=$$(basename "$$md" .md); \ + case "$$base" in \ + mount.composefs) section=8 ;; \ + composefs-dump) section=5 ;; \ + *) section=1 ;; \ + esac; \ + pandoc -s -t man "$$md" -o $(DESTDIR)$(MANDIR)/man$$section/$$base.$$section; \ + done diff --git a/crates/composefs-boot/src/selabel.rs b/crates/composefs-boot/src/selabel.rs index 5ec10aee..7bd24ef6 100644 --- a/crates/composefs-boot/src/selabel.rs +++ b/crates/composefs-boot/src/selabel.rs @@ -116,7 +116,10 @@ pub fn open_file( match dir.get_file_opt(filename.as_ref())? { Some(file) => match file { RegularFile::Inline(data) => Ok(Some(Box::new(Cursor::new(data.clone())))), - RegularFile::External(id, ..) => Ok(Some(Box::new(File::from(repo.open_object(id)?)))), + RegularFile::External(id, ..) | RegularFile::ExternalNoVerity(id, ..) => { + Ok(Some(Box::new(File::from(repo.open_object(id)?)))) + } + RegularFile::Sparse(..) => Ok(None), }, None => Ok(None), } diff --git a/crates/composefs-capi/Cargo.toml b/crates/composefs-capi/Cargo.toml new file mode 100644 index 00000000..68a2da3f --- /dev/null +++ b/crates/composefs-capi/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "composefs-capi" +version = "1.4.0" +edition.workspace = true +license.workspace = true +publish = false +description = "C-compatible shared library (libcomposefs) backed by Rust composefs implementation" + +[lib] +crate-type = ["cdylib", "staticlib"] + +[dependencies] +composefs = { workspace = true } +libc = "0.2" +anyhow = "1" +rustix = { version = "1", features = ["fs", "mm", "process", "mount"] } +zerocopy = "0.8" + +[build-dependencies] +cc = "1" + +[lints.rust] +unsafe_code = "allow" +missing_docs = "allow" +missing_debug_implementations = "allow" diff --git a/crates/composefs-capi/build.rs b/crates/composefs-capi/build.rs new file mode 100644 index 00000000..a399b0e2 --- /dev/null +++ b/crates/composefs-capi/build.rs @@ -0,0 +1,9 @@ +fn main() { + println!("cargo:rustc-cdylib-link-arg=-Wl,-soname,libcomposefs.so.1"); + + cc::Build::new() + .file("tests/test_lcfs.c") + .include("include/libcomposefs") + .warnings(false) + .compile("test_lcfs_c"); +} diff --git a/crates/composefs-capi/composefs.pc.in b/crates/composefs-capi/composefs.pc.in new file mode 100644 index 00000000..1eb33e86 --- /dev/null +++ b/crates/composefs-capi/composefs.pc.in @@ -0,0 +1,10 @@ +prefix=@PREFIX@ +exec_prefix=${prefix} +libdir=@LIBDIR@ +includedir=@INCLUDEDIR@ + +Name: Composefs +Description: library for generating and using composefs images +Version: @VERSION@ +Libs: -L${libdir} -lcomposefs +Cflags: -I${includedir} diff --git a/crates/composefs-capi/include/libcomposefs/lcfs-erofs.h b/crates/composefs-capi/include/libcomposefs/lcfs-erofs.h new file mode 100644 index 00000000..387b558f --- /dev/null +++ b/crates/composefs-capi/include/libcomposefs/lcfs-erofs.h @@ -0,0 +1,26 @@ +/* lcfs + Copyright (C) 2023 Alexander Larsson + + SPDX-License-Identifier: GPL-2.0-or-later OR Apache-2.0 +*/ +#ifndef _LCFS_EROFS_H +#define _LCFS_EROFS_H + +#include + +#define LCFS_EROFS_VERSION 1 +#define LCFS_EROFS_MAGIC 0xd078629aU + +typedef enum { + LCFS_EROFS_FLAGS_HAS_ACL = (1 << 0), +} lcfs_erofs_flag_t; + +struct lcfs_erofs_header_s { + uint32_t magic; + uint32_t version; + uint32_t flags; + uint32_t composefs_version; + uint32_t unused[4]; +} __attribute__((__packed__)); + +#endif diff --git a/crates/composefs-capi/include/libcomposefs/lcfs-mount.h b/crates/composefs-capi/include/libcomposefs/lcfs-mount.h new file mode 100644 index 00000000..06cd3b6e --- /dev/null +++ b/crates/composefs-capi/include/libcomposefs/lcfs-mount.h @@ -0,0 +1,52 @@ +/* lcfs + Copyright (C) 2023 Alexander Larsson + + SPDX-License-Identifier: GPL-2.0-or-later OR Apache-2.0 +*/ +#ifndef _LCFS_MOUNT_H +#define _LCFS_MOUNT_H + +#include +#include +#include +#include +#include + +#ifndef LCFS_EXTERN +#define LCFS_EXTERN extern +#endif + +#define ENOVERITY ENOTTY +#define EWRONGVERITY EILSEQ +#define ENOSIGNATURE EBADMSG + +enum lcfs_mount_flags_t { + LCFS_MOUNT_FLAGS_NONE = 0, + LCFS_MOUNT_FLAGS_REQUIRE_VERITY = (1 << 0), + LCFS_MOUNT_FLAGS_READONLY = (1 << 1), + LCFS_MOUNT_FLAGS_IDMAP = (1 << 3), + LCFS_MOUNT_FLAGS_TRY_VERITY = (1 << 4), + + LCFS_MOUNT_FLAGS_MASK = (1 << 5) - 1, +}; + +struct lcfs_mount_options_s { + const char **objdirs; + size_t n_objdirs; + const char *workdir; + const char *upperdir; + const char *expected_fsverity_digest; + uint32_t flags; + int idmap_fd; /* userns fd */ + const char *image_mountdir; /* Temporary location to mount images if needed */ + + uint32_t reserved[4]; + void *reserved2[4]; +}; + +LCFS_EXTERN int lcfs_mount_image(const char *path, const char *mountpoint, + struct lcfs_mount_options_s *options); +LCFS_EXTERN int lcfs_mount_fd(int fd, const char *mountpoint, + struct lcfs_mount_options_s *options); + +#endif diff --git a/crates/composefs-capi/include/libcomposefs/lcfs-writer.h b/crates/composefs-capi/include/libcomposefs/lcfs-writer.h new file mode 100644 index 00000000..a68564f6 --- /dev/null +++ b/crates/composefs-capi/include/libcomposefs/lcfs-writer.h @@ -0,0 +1,192 @@ +/* lcfs + Copyright (C) 2021 Giuseppe Scrivano + + SPDX-License-Identifier: GPL-2.0-or-later OR Apache-2.0 +*/ +#ifndef _LCFS_OPS_H +#define _LCFS_OPS_H + +#include +#include +#include +#include +#include + +#ifndef LCFS_EXTERN +#define LCFS_EXTERN extern +#endif + +#define LCFS_DIGEST_SIZE 32 + +enum { + LCFS_BUILD_SKIP_XATTRS = (1 << 0), + LCFS_BUILD_USE_EPOCH = (1 << 1), + LCFS_BUILD_SKIP_DEVICES = (1 << 2), + LCFS_BUILD_COMPUTE_DIGEST = (1 << 3), /* Store expected fs-verity digest */ + LCFS_BUILD_NO_INLINE = (1 << 4), + LCFS_BUILD_USER_XATTRS = (1 << 5), /* Only read user.* xattrs */ + LCFS_BUILD_BY_DIGEST = (1 << 6), /* Refer to basedir files by fs-verity digest */ +}; + +enum lcfs_format_t { + LCFS_FORMAT_EROFS, +}; + +enum lcfs_flags_t { + LCFS_FLAGS_NONE = 0, + LCFS_FLAGS_MASK = 0, +}; + +#define LCFS_VERSION_MAX 1 +/* Version history: + * 0 - Initial version + * 1 - Mark xwhitouts using the opaque=x format (1.0.3) + */ + +/* Default value used by tooling, update with care */ +#define LCFS_DEFAULT_VERSION_MIN 0 +#define LCFS_DEFAULT_VERSION_MAX 1 + +// The extra space required for metadata per xattr. +// Should match sizeof(struct erofs_xattr_entry) +#define LCFS_INODE_XATTRMETA_SIZE 4 +// Maximum size of key+value data (excluding trailing NUL for key) +// that can be allocated to an individual inode from external input. +// The EROFS limits this to basically UINT16_MAX - +// space for our internal xattrs. Out of conservatism we reserve +// fully half the xattr storage. +// +#define LCFS_INODE_EXTERNAL_XATTR_MAX (UINT16_MAX / 2) + +// The maximum size in bytes of file content which can be inlined +// into a composefs. It's generally recommended to stay far below +// this - use LCFS_RECOMMENDED_INLINE_CONTENT_MAX. +#define LCFS_INLINE_CONTENT_MAX 5000 +// The maximum recommended size for content to be inlined; +// We pick 64 which is the size of a sha256 digest that would otherwise be used as a redirect +// xattr, so the inlined file is smaller. +#define LCFS_RECOMMENDED_INLINE_CONTENT_MAX 64 + +typedef ssize_t (*lcfs_read_cb)(void *file, void *buf, size_t count); +typedef ssize_t (*lcfs_write_cb)(void *file, void *buf, size_t count); + +struct lcfs_write_options_s { + uint32_t format; + uint32_t version; + uint32_t flags; + uint8_t *digest_out; + void *file; + lcfs_write_cb file_write_cb; + uint32_t max_version; + uint32_t reserved[3]; + void *reserved2[4]; +}; + +LCFS_EXTERN struct lcfs_node_s *lcfs_node_new(void); +LCFS_EXTERN struct lcfs_node_s *lcfs_node_ref(struct lcfs_node_s *node); +LCFS_EXTERN void lcfs_node_unref(struct lcfs_node_s *node); +LCFS_EXTERN struct lcfs_node_s *lcfs_node_clone(struct lcfs_node_s *node); +LCFS_EXTERN struct lcfs_node_s *lcfs_node_clone_deep(struct lcfs_node_s *node); +LCFS_EXTERN struct lcfs_node_s *lcfs_load_node_from_file(int dirfd, const char *fname, + int buildflags); +LCFS_EXTERN struct lcfs_node_s *lcfs_load_node_from_image(const uint8_t *image_data, + size_t image_data_size); +struct lcfs_read_options_s { + // If non-NULL, this is a NULL terminated array of filenames; only entries + // for these files will be loaded. At the current time only filenames (not full paths) + // are supported. + const char *const *toplevel_entries; + uint32_t reserved[3]; + void *reserved2[4]; +}; +LCFS_EXTERN struct lcfs_node_s * +lcfs_load_node_from_image_ext(const uint8_t *image_data, size_t image_data_size, + const struct lcfs_read_options_s *opts); +LCFS_EXTERN struct lcfs_node_s *lcfs_load_node_from_fd(int fd); +LCFS_EXTERN struct lcfs_node_s * +lcfs_load_node_from_fd_ext(int fd, const struct lcfs_read_options_s *opts); +LCFS_EXTERN int lcfs_version_from_fd(int fd); + +LCFS_EXTERN const char *lcfs_node_get_xattr(struct lcfs_node_s *node, + const char *name, size_t *length); +LCFS_EXTERN int lcfs_node_set_xattr(struct lcfs_node_s *node, const char *name, + const char *value, size_t value_len); +LCFS_EXTERN int lcfs_node_unset_xattr(struct lcfs_node_s *node, const char *name); +LCFS_EXTERN size_t lcfs_node_get_n_xattr(struct lcfs_node_s *node); +LCFS_EXTERN const char *lcfs_node_get_xattr_name(struct lcfs_node_s *node, + size_t index); + +LCFS_EXTERN int lcfs_node_set_payload(struct lcfs_node_s *node, const char *payload); +LCFS_EXTERN int lcfs_node_set_symlink_payload(struct lcfs_node_s *node, + const char *payload); +LCFS_EXTERN const char *lcfs_node_get_payload(struct lcfs_node_s *node); + +LCFS_EXTERN int lcfs_node_set_content(struct lcfs_node_s *node, + const uint8_t *data, size_t data_size); +LCFS_EXTERN const uint8_t *lcfs_node_get_content(struct lcfs_node_s *node); + +LCFS_EXTERN struct lcfs_node_s *lcfs_node_lookup_child(struct lcfs_node_s *node, + const char *name); +LCFS_EXTERN struct lcfs_node_s *lcfs_node_get_parent(struct lcfs_node_s *node); +LCFS_EXTERN int lcfs_node_add_child(struct lcfs_node_s *parent, + struct lcfs_node_s *child, /* Takes ownership on success */ + const char *name); +LCFS_EXTERN const char *lcfs_node_get_name(struct lcfs_node_s *node); +LCFS_EXTERN size_t lcfs_node_get_n_children(struct lcfs_node_s *node); +LCFS_EXTERN struct lcfs_node_s *lcfs_node_get_child(struct lcfs_node_s *node, + size_t i); +LCFS_EXTERN void lcfs_node_make_hardlink(struct lcfs_node_s *node, + struct lcfs_node_s *target); +LCFS_EXTERN struct lcfs_node_s *lcfs_node_get_hardlink_target(struct lcfs_node_s *node); + +LCFS_EXTERN bool lcfs_node_dirp(struct lcfs_node_s *node); +LCFS_EXTERN uint32_t lcfs_node_get_mode(struct lcfs_node_s *node); +LCFS_EXTERN void lcfs_node_set_mode(struct lcfs_node_s *node, uint32_t mode); +LCFS_EXTERN int lcfs_node_try_set_mode(struct lcfs_node_s *node, uint32_t mode); +LCFS_EXTERN uint32_t lcfs_node_get_uid(struct lcfs_node_s *node); +LCFS_EXTERN void lcfs_node_set_uid(struct lcfs_node_s *node, uint32_t uid); +LCFS_EXTERN uint32_t lcfs_node_get_gid(struct lcfs_node_s *node); +LCFS_EXTERN void lcfs_node_set_gid(struct lcfs_node_s *node, uint32_t gid); +LCFS_EXTERN uint32_t lcfs_node_get_rdev(struct lcfs_node_s *node) + __attribute__((deprecated)); +LCFS_EXTERN uint64_t lcfs_node_get_rdev64(struct lcfs_node_s *node); +LCFS_EXTERN void lcfs_node_set_rdev(struct lcfs_node_s *node, uint32_t rdev) + __attribute__((deprecated)); +LCFS_EXTERN void lcfs_node_set_rdev64(struct lcfs_node_s *node, uint64_t rdev); +LCFS_EXTERN uint32_t lcfs_node_get_nlink(struct lcfs_node_s *node); +LCFS_EXTERN void lcfs_node_set_nlink(struct lcfs_node_s *node, uint32_t nlink); +LCFS_EXTERN uint64_t lcfs_node_get_size(struct lcfs_node_s *node); +LCFS_EXTERN void lcfs_node_set_size(struct lcfs_node_s *node, uint64_t size); +LCFS_EXTERN void lcfs_node_set_mtime(struct lcfs_node_s *node, struct timespec *time); +LCFS_EXTERN void lcfs_node_get_mtime(struct lcfs_node_s *node, struct timespec *time); + +LCFS_EXTERN const uint8_t *lcfs_node_get_fsverity_digest(struct lcfs_node_s *node); +LCFS_EXTERN void lcfs_node_set_fsverity_digest(struct lcfs_node_s *node, + uint8_t digest[LCFS_DIGEST_SIZE]); + +LCFS_EXTERN int lcfs_node_set_fsverity_from_content(struct lcfs_node_s *node, + void *file, + lcfs_read_cb read_cb); + +LCFS_EXTERN int lcfs_node_set_fsverity_from_fd(struct lcfs_node_s *node, int fd); + +LCFS_EXTERN struct lcfs_node_s *lcfs_build(int dirfd, const char *fname, + int buildflags, char **failed_path_out); + +LCFS_EXTERN int lcfs_write_to(struct lcfs_node_s *root, + struct lcfs_write_options_s *options); + +/* fsverity helpers */ +LCFS_EXTERN int lcfs_compute_fsverity_from_content(uint8_t *digest, void *file, + lcfs_read_cb read_cb); +LCFS_EXTERN int lcfs_compute_fsverity_from_fd(uint8_t *digest, int fd); +LCFS_EXTERN int lcfs_compute_fsverity_from_data(uint8_t *digest, uint8_t *data, + size_t data_len); +LCFS_EXTERN int lcfs_fd_measure_fsverity(uint8_t *digest, int fd); +LCFS_EXTERN int lcfs_fd_get_fsverity(uint8_t *digest, int fd); + +LCFS_EXTERN int lcfs_node_set_from_content(struct lcfs_node_s *node, int dirfd, + const char *fname, int buildflags); +LCFS_EXTERN int lcfs_fd_enable_fsverity(int fd); + +#endif diff --git a/crates/composefs-capi/include/libcomposefs/private/erofs_fs.h b/crates/composefs-capi/include/libcomposefs/private/erofs_fs.h new file mode 100644 index 00000000..a03ec70b --- /dev/null +++ b/crates/composefs-capi/include/libcomposefs/private/erofs_fs.h @@ -0,0 +1,461 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR Apache-2.0 */ +/* + * EROFS (Enhanced ROM File System) on-disk format definition + * + * Copyright (C) 2017-2018 HUAWEI, Inc. + * https://www.huawei.com/ + * Copyright (C) 2021, Alibaba Cloud + */ +#ifndef __EROFS_FS_H +#define __EROFS_FS_H + +#define EROFS_SUPER_OFFSET 1024 + +#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001 +#define EROFS_FEATURE_COMPAT_MTIME 0x00000002 +#define EROFS_FEATURE_COMPAT_XATTR_FILTER 0x00000004 + +/* + * Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should + * be incompatible with this kernel version. + */ +#define EROFS_FEATURE_INCOMPAT_ZERO_PADDING 0x00000001 +#define EROFS_FEATURE_INCOMPAT_COMPR_CFGS 0x00000002 +#define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER 0x00000002 +#define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE 0x00000004 +#define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008 +#define EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 0x00000008 +#define EROFS_FEATURE_INCOMPAT_ZTAILPACKING 0x00000010 +#define EROFS_FEATURE_INCOMPAT_FRAGMENTS 0x00000020 +#define EROFS_FEATURE_INCOMPAT_DEDUPE 0x00000020 +#define EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES 0x00000040 +#define EROFS_ALL_FEATURE_INCOMPAT \ + (EROFS_FEATURE_INCOMPAT_ZERO_PADDING | \ + EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \ + EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \ + EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \ + EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \ + EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 | \ + EROFS_FEATURE_INCOMPAT_ZTAILPACKING | \ + EROFS_FEATURE_INCOMPAT_FRAGMENTS | \ + EROFS_FEATURE_INCOMPAT_DEDUPE | \ + EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES) + +#define EROFS_SB_EXTSLOT_SIZE 16 + +struct erofs_deviceslot { + u8 tag[64]; /* digest(sha256), etc. */ + __le32 blocks; /* total fs blocks of this device */ + __le32 mapped_blkaddr; /* map starting at mapped_blkaddr */ + u8 reserved[56]; +}; +#define EROFS_DEVT_SLOT_SIZE sizeof(struct erofs_deviceslot) + +/* erofs on-disk super block (currently 128 bytes) */ +struct erofs_super_block { + __le32 magic; /* file system magic number */ + __le32 checksum; /* crc32c(super_block) */ + __le32 feature_compat; + __u8 blkszbits; /* filesystem block size in bit shift */ + __u8 sb_extslots; /* superblock size = 128 + sb_extslots * 16 */ + + __le16 root_nid; /* nid of root directory */ + __le64 inos; /* total valid ino # (== f_files - f_favail) */ + + __le64 build_time; /* compact inode time derivation */ + __le32 build_time_nsec; /* compact inode time derivation in ns scale */ + __le32 blocks; /* used for statfs */ + __le32 meta_blkaddr; /* start block address of metadata area */ + __le32 xattr_blkaddr; /* start block address of shared xattr area */ + __u8 uuid[16]; /* 128-bit uuid for volume */ + __u8 volume_name[16]; /* volume name */ + __le32 feature_incompat; + union { + /* bitmap for available compression algorithms */ + __le16 available_compr_algs; + /* customized sliding window size instead of 64k by default */ + __le16 lz4_max_distance; + } __packed u1; + __le16 extra_devices; /* # of devices besides the primary device */ + __le16 devt_slotoff; /* startoff = devt_slotoff * devt_slotsize */ + __u8 dirblkbits; /* directory block size in bit shift */ + __u8 xattr_prefix_count; /* # of long xattr name prefixes */ + __le32 xattr_prefix_start; /* start of long xattr prefixes */ + __le64 packed_nid; /* nid of the special packed inode */ + __u8 xattr_filter_reserved; /* reserved for xattr name filter */ + __u8 reserved2[23]; +}; + +/* + * EROFS inode datalayout (i_format in on-disk inode): + * 0 - uncompressed flat inode without tail-packing inline data: + * 1 - compressed inode with non-compact indexes: + * 2 - uncompressed flat inode with tail-packing inline data: + * 3 - compressed inode with compact indexes: + * 4 - chunk-based inode with (optional) multi-device support: + * 5~7 - reserved + */ +enum { + EROFS_INODE_FLAT_PLAIN = 0, + EROFS_INODE_COMPRESSED_FULL = 1, + EROFS_INODE_FLAT_INLINE = 2, + EROFS_INODE_COMPRESSED_COMPACT = 3, + EROFS_INODE_CHUNK_BASED = 4, + EROFS_INODE_DATALAYOUT_MAX +}; + +static inline bool erofs_inode_is_data_compressed(unsigned int datamode) +{ + return datamode == EROFS_INODE_COMPRESSED_COMPACT || + datamode == EROFS_INODE_COMPRESSED_FULL; +} + +/* bit definitions of inode i_format */ +#define EROFS_I_VERSION_MASK 0x01 +#define EROFS_I_DATALAYOUT_MASK 0x07 + +#define EROFS_I_VERSION_BIT 0 +#define EROFS_I_DATALAYOUT_BIT 1 +#define EROFS_I_ALL_BIT 4 + +#define EROFS_I_ALL ((1 << EROFS_I_ALL_BIT) - 1) + +/* indicate chunk blkbits, thus 'chunksize = blocksize << chunk blkbits' */ +#define EROFS_CHUNK_FORMAT_BLKBITS_MASK 0x001F +/* with chunk indexes or just a 4-byte blkaddr array */ +#define EROFS_CHUNK_FORMAT_INDEXES 0x0020 + +#define EROFS_CHUNK_FORMAT_ALL \ + (EROFS_CHUNK_FORMAT_BLKBITS_MASK | EROFS_CHUNK_FORMAT_INDEXES) + +/* 32-byte on-disk inode */ +#define EROFS_INODE_LAYOUT_COMPACT 0 +/* 64-byte on-disk inode */ +#define EROFS_INODE_LAYOUT_EXTENDED 1 + +struct erofs_inode_chunk_info { + __le16 format; /* chunk blkbits, etc. */ + __le16 reserved; +}; + +union erofs_inode_i_u { + /* total compressed blocks for compressed inodes */ + __le32 compressed_blocks; + + /* block address for uncompressed flat inodes */ + __le32 raw_blkaddr; + + /* for device files, used to indicate old/new device # */ + __le32 rdev; + + /* for chunk-based files, it contains the summary info */ + struct erofs_inode_chunk_info c; +}; + +/* 32-byte reduced form of an ondisk inode */ +struct erofs_inode_compact { + __le16 i_format; /* inode format hints */ + +/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */ + __le16 i_xattr_icount; + __le16 i_mode; + __le16 i_nlink; + __le32 i_size; + __le32 i_reserved; + union erofs_inode_i_u i_u; + + __le32 i_ino; /* only used for 32-bit stat compatibility */ + __le16 i_uid; + __le16 i_gid; + __le32 i_reserved2; +}; + +/* 64-byte complete form of an ondisk inode */ +struct erofs_inode_extended { + __le16 i_format; /* inode format hints */ + +/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */ + __le16 i_xattr_icount; + __le16 i_mode; + __le16 i_reserved; + __le64 i_size; + union erofs_inode_i_u i_u; + + __le32 i_ino; /* only used for 32-bit stat compatibility */ + __le32 i_uid; + __le32 i_gid; + __le64 i_mtime; + __le32 i_mtime_nsec; + __le32 i_nlink; + __u8 i_reserved2[16]; +}; + +/* + * inline xattrs (n == i_xattr_icount): + * erofs_xattr_ibody_header(1) + (n - 1) * 4 bytes + * 12 bytes / \ + * / \ + * /-----------------------\ + * | erofs_xattr_entries+ | + * +-----------------------+ + * inline xattrs must starts in erofs_xattr_ibody_header, + * for read-only fs, no need to introduce h_refcount + */ +struct erofs_xattr_ibody_header { + __le32 h_name_filter; /* bit value 1 indicates not-present */ + __u8 h_shared_count; + __u8 h_reserved2[7]; + __le32 h_shared_xattrs[]; /* shared xattr id array */ +}; + +/* Name indexes */ +#define EROFS_XATTR_INDEX_USER 1 +#define EROFS_XATTR_INDEX_POSIX_ACL_ACCESS 2 +#define EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT 3 +#define EROFS_XATTR_INDEX_TRUSTED 4 +#define EROFS_XATTR_INDEX_LUSTRE 5 +#define EROFS_XATTR_INDEX_SECURITY 6 + +/* + * bit 7 of e_name_index is set when it refers to a long xattr name prefix, + * while the remained lower bits represent the index of the prefix. + */ +#define EROFS_XATTR_LONG_PREFIX 0x80 +#define EROFS_XATTR_LONG_PREFIX_MASK 0x7f + +#define EROFS_XATTR_FILTER_BITS 32 +#define EROFS_XATTR_FILTER_DEFAULT UINT32_MAX +#define EROFS_XATTR_FILTER_SEED 0x25BBE08F + +/* xattr entry (for both inline & shared xattrs) */ +struct erofs_xattr_entry { + __u8 e_name_len; /* length of name */ + __u8 e_name_index; /* attribute name index */ + __le16 e_value_size; /* size of attribute value */ + /* followed by e_name and e_value */ + char e_name[]; /* attribute name */ +}; + +/* long xattr name prefix */ +struct erofs_xattr_long_prefix { + __u8 base_index; /* short xattr name prefix index */ + char infix[]; /* infix apart from short prefix */ +}; + +static inline unsigned int erofs_xattr_ibody_size(__le16 i_xattr_icount) +{ + if (!i_xattr_icount) + return 0; + + return sizeof(struct erofs_xattr_ibody_header) + + sizeof(__u32) * (le16_to_cpu(i_xattr_icount) - 1); +} + +#define EROFS_XATTR_ALIGN(size) round_up(size, sizeof(struct erofs_xattr_entry)) + +static inline unsigned int erofs_xattr_entry_size(struct erofs_xattr_entry *e) +{ + return EROFS_XATTR_ALIGN(sizeof(struct erofs_xattr_entry) + + e->e_name_len + le16_to_cpu(e->e_value_size)); +} + +/* represent a zeroed chunk (hole) */ +#define EROFS_NULL_ADDR -1 + +/* 4-byte block address array */ +#define EROFS_BLOCK_MAP_ENTRY_SIZE sizeof(__le32) + +/* 8-byte inode chunk indexes */ +struct erofs_inode_chunk_index { + __le16 advise; /* always 0, don't care for now */ + __le16 device_id; /* back-end storage id (with bits masked) */ + __le32 blkaddr; /* start block address of this inode chunk */ +}; + +/* dirent sorts in alphabet order, thus we can do binary search */ +struct erofs_dirent { + __le64 nid; /* node number */ + __le16 nameoff; /* start offset of file name */ + __u8 file_type; /* file type */ + __u8 reserved; /* reserved */ +} __packed; + +/* + * EROFS file types should match generic FT_* types and + * it seems no need to add BUILD_BUG_ONs since potential + * unmatchness will break other fses as well... + */ + +#define EROFS_NAME_LEN 255 + +/* maximum supported size of a physical compression cluster */ +#define Z_EROFS_PCLUSTER_MAX_SIZE (1024 * 1024) + +/* available compression algorithm types (for h_algorithmtype) */ +enum { + Z_EROFS_COMPRESSION_LZ4 = 0, + Z_EROFS_COMPRESSION_LZMA = 1, + Z_EROFS_COMPRESSION_DEFLATE = 2, + Z_EROFS_COMPRESSION_MAX +}; +#define Z_EROFS_ALL_COMPR_ALGS ((1 << Z_EROFS_COMPRESSION_MAX) - 1) + +/* 14 bytes (+ length field = 16 bytes) */ +struct z_erofs_lz4_cfgs { + __le16 max_distance; + __le16 max_pclusterblks; + u8 reserved[10]; +} __packed; + +/* 14 bytes (+ length field = 16 bytes) */ +struct z_erofs_lzma_cfgs { + __le32 dict_size; + __le16 format; + u8 reserved[8]; +} __packed; + +#define Z_EROFS_LZMA_MAX_DICT_SIZE (8 * Z_EROFS_PCLUSTER_MAX_SIZE) + +/* 6 bytes (+ length field = 8 bytes) */ +struct z_erofs_deflate_cfgs { + u8 windowbits; /* 8..15 for DEFLATE */ + u8 reserved[5]; +} __packed; + +/* + * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on) + * e.g. for 4k logical cluster size, 4B if compacted 2B is off; + * (4B) + 2B + (4B) if compacted 2B is on. + * bit 1 : HEAD1 big pcluster (0 - off; 1 - on) + * bit 2 : HEAD2 big pcluster (0 - off; 1 - on) + * bit 3 : tailpacking inline pcluster (0 - off; 1 - on) + * bit 4 : interlaced plain pcluster (0 - off; 1 - on) + * bit 5 : fragment pcluster (0 - off; 1 - on) + */ +#define Z_EROFS_ADVISE_COMPACTED_2B 0x0001 +#define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002 +#define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004 +#define Z_EROFS_ADVISE_INLINE_PCLUSTER 0x0008 +#define Z_EROFS_ADVISE_INTERLACED_PCLUSTER 0x0010 +#define Z_EROFS_ADVISE_FRAGMENT_PCLUSTER 0x0020 + +#define Z_EROFS_FRAGMENT_INODE_BIT 7 +struct z_erofs_map_header { + union { + /* fragment data offset in the packed inode */ + __le32 h_fragmentoff; + struct { + __le16 h_reserved1; + /* indicates the encoded size of tailpacking data */ + __le16 h_idata_size; + }; + }; + __le16 h_advise; + /* + * bit 0-3 : algorithm type of head 1 (logical cluster type 01); + * bit 4-7 : algorithm type of head 2 (logical cluster type 11). + */ + __u8 h_algorithmtype; + /* + * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096; + * bit 3-6 : reserved; + * bit 7 : move the whole file into packed inode or not. + */ + __u8 h_clusterbits; +}; + +/* + * On-disk logical cluster type: + * 0 - literal (uncompressed) lcluster + * 1,3 - compressed lcluster (for HEAD lclusters) + * 2 - compressed lcluster (for NONHEAD lclusters) + * + * In detail, + * 0 - literal (uncompressed) lcluster, + * di_advise = 0 + * di_clusterofs = the literal data offset of the lcluster + * di_blkaddr = the blkaddr of the literal pcluster + * + * 1,3 - compressed lcluster (for HEAD lclusters) + * di_advise = 1 or 3 + * di_clusterofs = the decompressed data offset of the lcluster + * di_blkaddr = the blkaddr of the compressed pcluster + * + * 2 - compressed lcluster (for NONHEAD lclusters) + * di_advise = 2 + * di_clusterofs = + * the decompressed data offset in its own HEAD lcluster + * di_u.delta[0] = distance to this HEAD lcluster + * di_u.delta[1] = distance to the next HEAD lcluster + */ +enum { + Z_EROFS_LCLUSTER_TYPE_PLAIN = 0, + Z_EROFS_LCLUSTER_TYPE_HEAD1 = 1, + Z_EROFS_LCLUSTER_TYPE_NONHEAD = 2, + Z_EROFS_LCLUSTER_TYPE_HEAD2 = 3, + Z_EROFS_LCLUSTER_TYPE_MAX +}; + +#define Z_EROFS_LI_LCLUSTER_TYPE_BITS 2 +#define Z_EROFS_LI_LCLUSTER_TYPE_BIT 0 + +/* (noncompact only, HEAD) This pcluster refers to partial decompressed data */ +#define Z_EROFS_LI_PARTIAL_REF (1 << 15) + +/* + * D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the + * compressed block count of a compressed extent (in logical clusters, aka. + * block count of a pcluster). + */ +#define Z_EROFS_LI_D0_CBLKCNT (1 << 11) + +struct z_erofs_lcluster_index { + __le16 di_advise; + /* where to decompress in the head lcluster */ + __le16 di_clusterofs; + + union { + /* for the HEAD lclusters */ + __le32 blkaddr; + /* + * for the NONHEAD lclusters + * [0] - distance to its HEAD lcluster + * [1] - distance to the next HEAD lcluster + */ + __le16 delta[2]; + } di_u; +}; + +#define Z_EROFS_FULL_INDEX_ALIGN(end) \ + (ALIGN(end, 8) + sizeof(struct z_erofs_map_header) + 8) + +/* check the EROFS on-disk layout strictly at compile time */ +static inline void erofs_check_ondisk_layout_definitions(void) +{ + const __le64 fmh = *(__le64 *)&(struct z_erofs_map_header) { + .h_clusterbits = 1 << Z_EROFS_FRAGMENT_INODE_BIT + }; + + BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128); + BUILD_BUG_ON(sizeof(struct erofs_inode_compact) != 32); + BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64); + BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12); + BUILD_BUG_ON(sizeof(struct erofs_xattr_entry) != 4); + BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_info) != 4); + BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) != 8); + BUILD_BUG_ON(sizeof(struct z_erofs_map_header) != 8); + BUILD_BUG_ON(sizeof(struct z_erofs_lcluster_index) != 8); + BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12); + /* keep in sync between 2 index structures for better extendibility */ + BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) != + sizeof(struct z_erofs_lcluster_index)); + BUILD_BUG_ON(sizeof(struct erofs_deviceslot) != 128); + + BUILD_BUG_ON(BIT(Z_EROFS_LI_LCLUSTER_TYPE_BITS) < + Z_EROFS_LCLUSTER_TYPE_MAX - 1); + /* exclude old compiler versions like gcc 7.5.0 */ + BUILD_BUG_ON(__builtin_constant_p(fmh) ? + fmh != cpu_to_le64(1ULL << 63) : 0); +} + +#endif diff --git a/crates/composefs-capi/include/libcomposefs/private/erofs_fs_wrapper.h b/crates/composefs-capi/include/libcomposefs/private/erofs_fs_wrapper.h new file mode 100644 index 00000000..600bb876 --- /dev/null +++ b/crates/composefs-capi/include/libcomposefs/private/erofs_fs_wrapper.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later OR Apache-2.0 */ + +#include +#include + +#define __packed __attribute__((__packed__)) +typedef __u8 u8; + +static inline __u16 cpu_to_le16(__u16 val) +{ + return htole16(val); +} + +static inline __u32 cpu_to_le32(__u32 val) +{ + return htole32(val); +} + +static inline __u64 cpu_to_le64(__u64 val) +{ + return htole64(val); +} + +static inline __u16 le16_to_cpu(__u16 val) +{ + return le16toh(val); +} + +static inline __u32 le32_to_cpu(__u32 val) +{ + return le32toh(val); +} + +static inline __u64 le64_to_cpu(__u64 val) +{ + return le64toh(val); +} + +/* Note: These only do power of 2 */ +#ifndef round_up +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) +#define round_up(x, y) ((((x)-1) | __round_mask(x, y)) + 1) +#define round_down(x, y) ((x) & ~__round_mask(x, y)) +#endif + +#ifndef ALIGN_TO +#define ALIGN_TO(_offset, _align_size) \ + (((_offset) + _align_size - 1) & ~(_align_size - 1)) +#endif + +#define BIT(nr) (((uint64_t) 1) << (nr)) +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2 * !!(condition)])) +#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) + +/* We use a fixed block size for all arches of 4k */ +#define EROFS_BLKSIZ 4096 +#define EROFS_BLKSIZ_BITS 12 + +#define EROFS_ISLOTBITS 5 +#define EROFS_SLOTSIZE (1U << EROFS_ISLOTBITS) + +#define EROFS_SUPER_MAGIC_V1 0xE0F5E1E2 + +#define CRC32C_POLY_LE 0x82F63B78 +static inline uint32_t erofs_crc32c(uint32_t crc, const uint8_t *in, size_t len) +{ + int i; + + while (len--) { + crc ^= *in++; + for (i = 0; i < 8; i++) + crc = (crc >> 1) ^ ((crc & 1) ? CRC32C_POLY_LE : 0); + } + return crc; +} + +enum { + EROFS_FT_UNKNOWN, + EROFS_FT_REG_FILE, + EROFS_FT_DIR, + EROFS_FT_CHRDEV, + EROFS_FT_BLKDEV, + EROFS_FT_FIFO, + EROFS_FT_SOCK, + EROFS_FT_SYMLINK, + EROFS_FT_MAX +}; + +#define ilog2(n) \ +( \ + (n) & (1ULL << 63) ? 63 : \ + (n) & (1ULL << 62) ? 62 : \ + (n) & (1ULL << 61) ? 61 : \ + (n) & (1ULL << 60) ? 60 : \ + (n) & (1ULL << 59) ? 59 : \ + (n) & (1ULL << 58) ? 58 : \ + (n) & (1ULL << 57) ? 57 : \ + (n) & (1ULL << 56) ? 56 : \ + (n) & (1ULL << 55) ? 55 : \ + (n) & (1ULL << 54) ? 54 : \ + (n) & (1ULL << 53) ? 53 : \ + (n) & (1ULL << 52) ? 52 : \ + (n) & (1ULL << 51) ? 51 : \ + (n) & (1ULL << 50) ? 50 : \ + (n) & (1ULL << 49) ? 49 : \ + (n) & (1ULL << 48) ? 48 : \ + (n) & (1ULL << 47) ? 47 : \ + (n) & (1ULL << 46) ? 46 : \ + (n) & (1ULL << 45) ? 45 : \ + (n) & (1ULL << 44) ? 44 : \ + (n) & (1ULL << 43) ? 43 : \ + (n) & (1ULL << 42) ? 42 : \ + (n) & (1ULL << 41) ? 41 : \ + (n) & (1ULL << 40) ? 40 : \ + (n) & (1ULL << 39) ? 39 : \ + (n) & (1ULL << 38) ? 38 : \ + (n) & (1ULL << 37) ? 37 : \ + (n) & (1ULL << 36) ? 36 : \ + (n) & (1ULL << 35) ? 35 : \ + (n) & (1ULL << 34) ? 34 : \ + (n) & (1ULL << 33) ? 33 : \ + (n) & (1ULL << 32) ? 32 : \ + (n) & (1ULL << 31) ? 31 : \ + (n) & (1ULL << 30) ? 30 : \ + (n) & (1ULL << 29) ? 29 : \ + (n) & (1ULL << 28) ? 28 : \ + (n) & (1ULL << 27) ? 27 : \ + (n) & (1ULL << 26) ? 26 : \ + (n) & (1ULL << 25) ? 25 : \ + (n) & (1ULL << 24) ? 24 : \ + (n) & (1ULL << 23) ? 23 : \ + (n) & (1ULL << 22) ? 22 : \ + (n) & (1ULL << 21) ? 21 : \ + (n) & (1ULL << 20) ? 20 : \ + (n) & (1ULL << 19) ? 19 : \ + (n) & (1ULL << 18) ? 18 : \ + (n) & (1ULL << 17) ? 17 : \ + (n) & (1ULL << 16) ? 16 : \ + (n) & (1ULL << 15) ? 15 : \ + (n) & (1ULL << 14) ? 14 : \ + (n) & (1ULL << 13) ? 13 : \ + (n) & (1ULL << 12) ? 12 : \ + (n) & (1ULL << 11) ? 11 : \ + (n) & (1ULL << 10) ? 10 : \ + (n) & (1ULL << 9) ? 9 : \ + (n) & (1ULL << 8) ? 8 : \ + (n) & (1ULL << 7) ? 7 : \ + (n) & (1ULL << 6) ? 6 : \ + (n) & (1ULL << 5) ? 5 : \ + (n) & (1ULL << 4) ? 4 : \ + (n) & (1ULL << 3) ? 3 : \ + (n) & (1ULL << 2) ? 2 : \ + (n) & (1ULL << 1) ? 1 : 0 \ +) + +#include "erofs_fs.h" diff --git a/crates/composefs-capi/install.sh b/crates/composefs-capi/install.sh new file mode 100755 index 00000000..11ef5b95 --- /dev/null +++ b/crates/composefs-capi/install.sh @@ -0,0 +1,48 @@ +#!/bin/sh +set -eu + +PREFIX="${1:-/usr/local}" +LIBDIR="${LIBDIR:-${PREFIX}/lib}" +INCLUDEDIR="${INCLUDEDIR:-${PREFIX}/include}" +PKGCONFIGDIR="${PKGCONFIGDIR:-${LIBDIR}/pkgconfig}" + +VERSION="1.4.0" +SOVERSION="1" + +# Find the built library +PROFILE="${PROFILE:-release}" +TARGETDIR="${CARGO_TARGET_DIR:-$(cd "$(dirname "$0")/../.." && pwd)/target}" +SOFILE="${TARGETDIR}/${PROFILE}/libcomposefs_capi.so" +AFILE="${TARGETDIR}/${PROFILE}/libcomposefs_capi.a" + +if [ ! -f "$SOFILE" ]; then + echo "error: $SOFILE not found. Run 'cargo build --release -p composefs-capi' first." >&2 + exit 1 +fi + +install -d "${LIBDIR}" "${INCLUDEDIR}/libcomposefs" "${PKGCONFIGDIR}" + +# Shared library with soname symlinks +install -m 755 "$SOFILE" "${LIBDIR}/libcomposefs.so.${VERSION}" +ln -sf "libcomposefs.so.${VERSION}" "${LIBDIR}/libcomposefs.so.${SOVERSION}" +ln -sf "libcomposefs.so.${SOVERSION}" "${LIBDIR}/libcomposefs.so" + +# Static library +if [ -f "$AFILE" ]; then + install -m 644 "$AFILE" "${LIBDIR}/libcomposefs.a" +fi + +# Public headers only (private/ subdirectory is not installed) +SCRIPTDIR="$(cd "$(dirname "$0")" && pwd)" +for h in "${SCRIPTDIR}"/include/libcomposefs/*.h; do + install -m 644 "$h" "${INCLUDEDIR}/libcomposefs/" +done + +# pkg-config +sed -e "s|@PREFIX@|${PREFIX}|g" \ + -e "s|@LIBDIR@|${LIBDIR}|g" \ + -e "s|@INCLUDEDIR@|${INCLUDEDIR}|g" \ + -e "s|@VERSION@|${VERSION}|g" \ + "${SCRIPTDIR}/composefs.pc.in" > "${PKGCONFIGDIR}/composefs.pc" + +echo "Installed libcomposefs ${VERSION} to ${PREFIX}" diff --git a/crates/composefs-capi/src/convert.rs b/crates/composefs-capi/src/convert.rs new file mode 100644 index 00000000..ecb1a655 --- /dev/null +++ b/crates/composefs-capi/src/convert.rs @@ -0,0 +1,325 @@ +use std::collections::HashMap; +use std::ffi::{CStr, CString, OsStr, OsString}; +use std::os::unix::ffi::OsStrExt; +use std::ptr; + +use zerocopy::{FromBytes, IntoBytes}; + +use composefs::fsverity::{FsVerityHashValue, Sha256HashValue}; +use composefs::generic_tree::{self, LeafId}; +use composefs::tree::{self, RegularFile}; + +use crate::node::lcfs_node_ref; +use crate::{FfiNode, FfiXattr}; + +fn stat_from_ffi(node: &FfiNode) -> generic_tree::Stat { + let mut xattrs = std::collections::BTreeMap::new(); + for xattr in unsafe { node.xattrs_slice() } { + let key = OsStr::from_bytes(unsafe { xattr.key_cstr() }.to_bytes()); + xattrs.insert(Box::from(key), Box::from(unsafe { xattr.value_bytes() })); + } + generic_tree::Stat { + st_mode: node.inode.st_mode, + st_uid: node.inode.st_uid, + st_gid: node.inode.st_gid, + st_mtim_sec: node.inode.st_mtim_sec, + st_mtim_nsec: node.inode.st_mtim_nsec, + xattrs, + } +} + +fn stat_to_ffi(stat: &generic_tree::Stat, node: &mut FfiNode) { + node.inode.st_mode = stat.st_mode; + node.inode.st_uid = stat.st_uid; + node.inode.st_gid = stat.st_gid; + node.inode.st_mtim_sec = stat.st_mtim_sec; + node.inode.st_mtim_nsec = stat.st_mtim_nsec; + let xattr_vec: Vec = stat + .xattrs + .iter() + .map(|(k, v)| { + let key = CString::new(k.as_bytes()).unwrap_or_else(|_| { + CString::new( + k.as_bytes() + .iter() + .copied() + .filter(|&b| b != 0) + .collect::>(), + ) + .unwrap() + }); + unsafe { FfiXattr::new(&key, v) } + }) + .collect(); + unsafe { + node.xattrs_put_back(xattr_vec); + } +} + +/// Convert an FfiNode tree into a composefs FileSystem. +/// +/// Hardlinked nodes (those with link_to set) will share the same LeafId. +pub(crate) fn ffi_tree_to_filesystem( + root: &FfiNode, +) -> anyhow::Result> { + let root_stat = stat_from_ffi(root); + let mut fs = tree::FileSystem::new(root_stat); + let mut hardlink_map: HashMap = HashMap::new(); + + ffi_dir_to_fs(root, &mut fs.root, &mut fs.leaves, &mut hardlink_map)?; + Ok(fs) +} + +fn ffi_dir_to_fs( + ffi_dir: &FfiNode, + dir: &mut tree::Directory, + leaves: &mut Vec>, + hardlink_map: &mut HashMap, +) -> anyhow::Result<()> { + for &child_ptr in unsafe { ffi_dir.children_slice() } { + let child = unsafe { &*child_ptr }; + let child_name = if !child.name.is_null() { + OsStr::from_bytes(unsafe { CStr::from_ptr(child.name) }.to_bytes()) + } else { + OsStr::new("") + }; + + let file_type = child.inode.st_mode & libc::S_IFMT; + + if file_type == libc::S_IFDIR { + let child_stat = stat_from_ffi(child); + let mut subdir = tree::Directory::new(child_stat); + ffi_dir_to_fs(child, &mut subdir, leaves, hardlink_map)?; + dir.insert(child_name, generic_tree::Inode::Directory(Box::new(subdir))); + } else { + let target_ptr = if !child.link_to.is_null() { + child.link_to as usize + } else { + child_ptr as usize + }; + + if let Some(&existing_id) = hardlink_map.get(&target_ptr) { + dir.insert(child_name, generic_tree::Inode::leaf(existing_id)); + continue; + } + + let actual = if !child.link_to.is_null() { + unsafe { &*child.link_to } + } else { + child + }; + + let leaf_content = ffi_node_to_leaf_content(actual)?; + let leaf_stat = stat_from_ffi(actual); + let leaf_id = LeafId(leaves.len()); + leaves.push(tree::Leaf { + stat: leaf_stat, + content: leaf_content, + }); + hardlink_map.insert(target_ptr, leaf_id); + dir.insert(child_name, generic_tree::Inode::leaf(leaf_id)); + } + } + Ok(()) +} + +fn ffi_node_to_leaf_content(node: &FfiNode) -> anyhow::Result> { + let file_type = node.inode.st_mode & libc::S_IFMT; + match file_type { + t if t == libc::S_IFREG => { + if !node.content.is_null() { + let data = unsafe { + std::slice::from_raw_parts(node.content, node.inode.st_size as usize) + }; + Ok(generic_tree::LeafContent::Regular(RegularFile::Inline( + Box::from(data), + ))) + } else if node.digest_set { + let digest = + Sha256HashValue::read_from_bytes(&node.digest).expect("digest size mismatch"); + Ok(generic_tree::LeafContent::Regular(RegularFile::External( + digest, + node.inode.st_size, + ))) + } else if !node.payload.is_null() { + let payload = unsafe { CStr::from_ptr(node.payload) }; + let raw = payload.to_bytes(); + let path = raw.strip_suffix(b".file").unwrap_or(raw); + let digest = Sha256HashValue::from_object_pathname(path) + .map_err(|e| anyhow::anyhow!("invalid digest path: {e}"))?; + Ok(generic_tree::LeafContent::Regular( + RegularFile::ExternalNoVerity(digest, node.inode.st_size), + )) + } else if node.inode.st_size > 0 { + Ok(generic_tree::LeafContent::Regular(RegularFile::Sparse( + node.inode.st_size, + ))) + } else { + Ok(generic_tree::LeafContent::Regular(RegularFile::Inline( + Box::new([]), + ))) + } + } + t if t == libc::S_IFLNK => { + let target = if !node.payload.is_null() { + let p = unsafe { CStr::from_ptr(node.payload) }; + OsString::from(OsStr::from_bytes(p.to_bytes())) + } else { + OsString::new() + }; + Ok(generic_tree::LeafContent::Symlink( + target.into_boxed_os_str(), + )) + } + t if t == libc::S_IFBLK => Ok(generic_tree::LeafContent::BlockDevice( + node.inode.st_rdev as u64, + )), + t if t == libc::S_IFCHR => Ok(generic_tree::LeafContent::CharacterDevice( + node.inode.st_rdev as u64, + )), + t if t == libc::S_IFIFO => Ok(generic_tree::LeafContent::Fifo), + t if t == libc::S_IFSOCK => Ok(generic_tree::LeafContent::Socket), + _ => anyhow::bail!("unknown file type: {:#o}", file_type), + } +} + +/// Convert a composefs FileSystem into an FfiNode tree. +/// +/// The returned pointer is a newly allocated root node with ref_count=1. +/// The caller is responsible for calling lcfs_node_unref on it. +pub(crate) fn filesystem_to_ffi_tree(fs: &tree::FileSystem) -> *mut FfiNode { + let mut root = Box::new(FfiNode::default()); + stat_to_ffi(&fs.root.stat, &mut root); + root.inode.st_mode |= libc::S_IFDIR; + + // Map LeafId -> *mut FfiNode for hardlink tracking + let mut leaf_node_map: HashMap = HashMap::new(); + let nlinks = fs.nlinks(); + + let root_ptr = Box::into_raw(root); + + fs_dir_to_ffi(&fs.root, &fs.leaves, &nlinks, root_ptr, &mut leaf_node_map); + + root_ptr +} + +fn fs_dir_to_ffi( + dir: &tree::Directory, + leaves: &[tree::Leaf], + nlinks: &[u32], + parent: *mut FfiNode, + leaf_node_map: &mut HashMap, +) { + for (name, inode) in dir.sorted_entries() { + match inode { + generic_tree::Inode::Directory(subdir) => { + let mut child = Box::new(FfiNode::default()); + stat_to_ffi(&subdir.stat, &mut child); + child.inode.st_mode = (child.inode.st_mode & !libc::S_IFMT) | libc::S_IFDIR; + let name_bytes = name.as_bytes(); + child.name = CString::new(name_bytes).map_or(ptr::null_mut(), CString::into_raw); + child.parent = parent; + + let child_ptr = Box::into_raw(child); + fs_dir_to_ffi(subdir, leaves, nlinks, child_ptr, leaf_node_map); + unsafe { + let mut children = (*parent).children_as_vec(); + children.push(child_ptr); + (*parent).children_put_back(children); + } + } + generic_tree::Inode::Leaf(leaf_id, _) => { + let leaf = &leaves[leaf_id.0]; + let is_hardlink = nlinks[leaf_id.0] > 1; + + if is_hardlink && let Some(&existing_ptr) = leaf_node_map.get(&leaf_id.0) { + let mut link_node = Box::new(FfiNode::default()); + let name_bytes = name.as_bytes(); + link_node.name = + CString::new(name_bytes).map_or(ptr::null_mut(), CString::into_raw); + link_node.parent = parent; + link_node.link_to = unsafe { lcfs_node_ref(existing_ptr) }; + + let link_ptr = Box::into_raw(link_node); + unsafe { + let mut children = (*parent).children_as_vec(); + children.push(link_ptr); + (*parent).children_put_back(children); + } + continue; + } + + let mut child = Box::new(FfiNode::default()); + stat_to_ffi(&leaf.stat, &mut child); + leaf_content_to_ffi(&leaf.content, &mut child); + let name_bytes = name.as_bytes(); + child.name = CString::new(name_bytes).map_or(ptr::null_mut(), CString::into_raw); + child.parent = parent; + child.inode.st_nlink = nlinks[leaf_id.0]; + + let child_ptr = Box::into_raw(child); + + if is_hardlink { + leaf_node_map.insert(leaf_id.0, child_ptr); + } + + unsafe { + let mut children = (*parent).children_as_vec(); + children.push(child_ptr); + (*parent).children_put_back(children); + } + } + } + } +} + +fn leaf_content_to_ffi(content: &tree::LeafContent, node: &mut FfiNode) { + match content { + generic_tree::LeafContent::Regular(reg) => { + node.inode.st_mode = (node.inode.st_mode & !libc::S_IFMT) | libc::S_IFREG; + match reg { + RegularFile::Inline(data) => { + node.inode.st_size = data.len() as u64; + unsafe { + node.set_content_buf(data); + } + } + RegularFile::External(digest, size) => { + node.inode.st_size = *size; + node.digest.copy_from_slice(digest.as_bytes()); + node.digest_set = true; + let path = digest.to_object_pathname(); + node.payload = CString::new(path).map_or(ptr::null_mut(), CString::into_raw); + } + RegularFile::ExternalNoVerity(digest, size) => { + node.inode.st_size = *size; + let path = digest.to_object_pathname(); + node.payload = CString::new(path).map_or(ptr::null_mut(), CString::into_raw); + } + RegularFile::Sparse(size) => { + node.inode.st_size = *size; + } + } + } + generic_tree::LeafContent::Symlink(target) => { + node.inode.st_mode = (node.inode.st_mode & !libc::S_IFMT) | libc::S_IFLNK; + node.inode.st_size = target.len() as u64; + node.payload = + CString::new(target.as_bytes()).map_or(ptr::null_mut(), CString::into_raw); + } + generic_tree::LeafContent::BlockDevice(rdev) => { + node.inode.st_mode = (node.inode.st_mode & !libc::S_IFMT) | libc::S_IFBLK; + node.inode.st_rdev = *rdev as u32; + } + generic_tree::LeafContent::CharacterDevice(rdev) => { + node.inode.st_mode = (node.inode.st_mode & !libc::S_IFMT) | libc::S_IFCHR; + node.inode.st_rdev = *rdev as u32; + } + generic_tree::LeafContent::Fifo => { + node.inode.st_mode = (node.inode.st_mode & !libc::S_IFMT) | libc::S_IFIFO; + } + generic_tree::LeafContent::Socket => { + node.inode.st_mode = (node.inode.st_mode & !libc::S_IFMT) | libc::S_IFSOCK; + } + } +} diff --git a/crates/composefs-capi/src/errno.rs b/crates/composefs-capi/src/errno.rs new file mode 100644 index 00000000..fb98c519 --- /dev/null +++ b/crates/composefs-capi/src/errno.rs @@ -0,0 +1,3 @@ +pub(crate) fn set_errno(err: i32) { + unsafe { *libc::__errno_location() = err }; +} diff --git a/crates/composefs-capi/src/fsverity.rs b/crates/composefs-capi/src/fsverity.rs new file mode 100644 index 00000000..113eb004 --- /dev/null +++ b/crates/composefs-capi/src/fsverity.rs @@ -0,0 +1,308 @@ +use std::ffi::{CString, c_void}; +use std::io::{Read, Seek}; +use std::mem::ManuallyDrop; +use std::os::fd::{BorrowedFd, FromRawFd}; +use std::ptr; + +use libc::{c_int, ssize_t}; +use zerocopy::IntoBytes; + +use composefs::fsverity::{self, EnableVerityError, MeasureVerityError, Sha256HashValue}; + +use crate::errno::set_errno; +use crate::{FfiNode, LCFS_DIGEST_SIZE}; + +type LcfsReadCb = unsafe extern "C" fn(*mut c_void, *mut c_void, usize) -> ssize_t; + +fn copy_hash_to_digest(hash: &Sha256HashValue, digest: *mut u8) { + let bytes = hash.as_bytes(); + unsafe { + std::ptr::copy_nonoverlapping(bytes.as_ptr(), digest, LCFS_DIGEST_SIZE); + } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_compute_fsverity_from_data( + digest: *mut u8, + data: *mut u8, + data_len: usize, +) -> c_int { + if digest.is_null() || data.is_null() { + set_errno(libc::EINVAL); + return -1; + } + + let input = unsafe { std::slice::from_raw_parts(data, data_len) }; + let hash = fsverity::compute_verity::(input); + copy_hash_to_digest(&hash, digest); + 0 +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_compute_fsverity_from_fd(digest: *mut u8, fd: c_int) -> c_int { + if digest.is_null() || fd < 0 { + set_errno(libc::EINVAL); + return -1; + } + + // Match C behavior: read from the current offset position, do not seek. + let mut file = ManuallyDrop::new(unsafe { std::fs::File::from_raw_fd(fd) }); + let mut data = Vec::new(); + if file.read_to_end(&mut data).is_err() { + set_errno(libc::EIO); + return -1; + } + + let hash = fsverity::compute_verity::(&data); + copy_hash_to_digest(&hash, digest); + 0 +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_compute_fsverity_from_content( + digest: *mut u8, + file: *mut c_void, + read_cb: LcfsReadCb, +) -> c_int { + if digest.is_null() { + set_errno(libc::EINVAL); + return -1; + } + + let mut data = Vec::new(); + let mut buf = [0u8; 65536]; + loop { + let n = unsafe { read_cb(file, buf.as_mut_ptr() as *mut c_void, buf.len()) }; + if n < 0 { + set_errno(libc::EIO); + return -1; + } + if n == 0 { + break; + } + data.extend_from_slice(&buf[..n as usize]); + } + + let hash = fsverity::compute_verity::(&data); + copy_hash_to_digest(&hash, digest); + 0 +} + +const ENOVERITY: c_int = libc::ENOTTY; + +fn measure_error_to_errno(e: &MeasureVerityError) -> c_int { + match e { + MeasureVerityError::VerityMissing | MeasureVerityError::FilesystemNotSupported => ENOVERITY, + MeasureVerityError::Io(io_err) => { + let raw = io_err.raw_os_error().unwrap_or(libc::EIO); + if raw == libc::ENODATA || raw == libc::EOPNOTSUPP || raw == libc::ENOTTY { + ENOVERITY + } else { + raw + } + } + _ => libc::EIO, + } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_fd_measure_fsverity(digest: *mut u8, fd: c_int) -> c_int { + if digest.is_null() || fd < 0 { + set_errno(libc::EINVAL); + return -1; + } + + let borrowed = unsafe { BorrowedFd::borrow_raw(fd) }; + match fsverity::measure_verity::(borrowed) { + Ok(hash) => { + copy_hash_to_digest(&hash, digest); + 0 + } + Err(ref e) => { + let err = measure_error_to_errno(e); + set_errno(err); + -(err) + } + } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_fd_get_fsverity(digest: *mut u8, fd: c_int) -> c_int { + if digest.is_null() || fd < 0 { + set_errno(libc::EINVAL); + return -1; + } + + let borrowed = unsafe { BorrowedFd::borrow_raw(fd) }; + match fsverity::measure_verity_opt::(borrowed) { + Ok(Some(hash)) => { + copy_hash_to_digest(&hash, digest); + 0 + } + Ok(None) => { + set_errno(libc::ENODATA); + -(libc::ENODATA) + } + Err(ref e) => { + let err = measure_error_to_errno(e); + set_errno(err); + -(err) + } + } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_fd_enable_fsverity(fd: c_int) -> c_int { + if fd < 0 { + set_errno(libc::EINVAL); + return -1; + } + + let borrowed = unsafe { BorrowedFd::borrow_raw(fd) }; + match fsverity::enable_verity_raw::(borrowed) { + Ok(()) => 0, + Err(EnableVerityError::AlreadyEnabled) => 0, + Err(EnableVerityError::Io(ref io_err)) => { + let errno_val = io_err.raw_os_error().unwrap_or(libc::EIO); + -(errno_val) + } + Err(_) => { + set_errno(libc::ENOTSUP); + -1 + } + } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_set_fsverity_from_content( + node: *mut FfiNode, + file: *mut c_void, + read_cb: LcfsReadCb, +) -> c_int { + if node.is_null() { + set_errno(libc::EINVAL); + return -1; + } + + let mut digest = [0u8; LCFS_DIGEST_SIZE]; + let ret = unsafe { lcfs_compute_fsverity_from_content(digest.as_mut_ptr(), file, read_cb) }; + if ret < 0 { + return ret; + } + + unsafe { + (*node).digest = digest; + (*node).digest_set = true; + } + 0 +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_set_fsverity_from_fd(node: *mut FfiNode, fd: c_int) -> c_int { + if node.is_null() { + set_errno(libc::EINVAL); + return -1; + } + + let mut digest = [0u8; LCFS_DIGEST_SIZE]; + let ret = unsafe { lcfs_compute_fsverity_from_fd(digest.as_mut_ptr(), fd) }; + if ret < 0 { + return ret; + } + + unsafe { + (*node).digest = digest; + (*node).digest_set = true; + } + 0 +} + +const LCFS_BUILD_COMPUTE_DIGEST: c_int = 1 << 3; +const LCFS_BUILD_NO_INLINE: c_int = 1 << 4; +const LCFS_BUILD_BY_DIGEST: c_int = 1 << 6; +const LCFS_RECOMMENDED_INLINE_CONTENT_MAX: u64 = 64; + +fn digest_to_path(digest: &[u8; LCFS_DIGEST_SIZE]) -> String { + use std::fmt::Write; + let mut path = String::with_capacity(LCFS_DIGEST_SIZE * 2 + 1); + for (i, &byte) in digest.iter().enumerate() { + if i == 1 { + path.push('/'); + } + let _ = write!(path, "{byte:02x}"); + } + path +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_set_from_content( + node: *mut FfiNode, + dirfd: c_int, + fname: *const libc::c_char, + buildflags: c_int, +) -> c_int { + if node.is_null() || fname.is_null() { + set_errno(libc::EINVAL); + return -1; + } + + unsafe { + let compute_digest = (buildflags & LCFS_BUILD_COMPUTE_DIGEST) != 0; + let by_digest = (buildflags & LCFS_BUILD_BY_DIGEST) != 0; + let no_inline = (buildflags & LCFS_BUILD_NO_INLINE) != 0; + let is_zerosized = (*node).inode.st_size == 0; + let do_digest = !is_zerosized && (compute_digest || by_digest); + let do_inline = !is_zerosized + && !no_inline + && (*node).inode.st_size <= LCFS_RECOMMENDED_INLINE_CONTENT_MAX; + + if !do_digest && !do_inline { + return 0; + } + + let fd = libc::openat(dirfd, fname, libc::O_RDONLY | libc::O_CLOEXEC); + if fd < 0 { + return -1; + } + + let mut file = std::fs::File::from_raw_fd(fd); + + if do_digest { + let mut data = Vec::new(); + if file.read_to_end(&mut data).is_err() { + set_errno(libc::EIO); + return -1; + } + + let hash = fsverity::compute_verity::(&data); + (*node).digest.copy_from_slice(hash.as_bytes()); + (*node).digest_set = true; + + if by_digest { + let path = digest_to_path(&(*node).digest); + (*node).free_payload(); + (*node).payload = CString::new(path).map_or(ptr::null_mut(), CString::into_raw); + + if !compute_digest { + (*node).digest_set = false; + } + } + + if do_inline && file.seek(std::io::SeekFrom::Start(0)).is_err() { + set_errno(libc::EIO); + return -1; + } + } + + if do_inline { + let mut buf = vec![0u8; (*node).inode.st_size as usize]; + if file.read_exact(&mut buf).is_err() { + set_errno(libc::EIO); + return -1; + } + (*node).set_content_buf(&buf); + } + + 0 + } +} diff --git a/crates/composefs-capi/src/image.rs b/crates/composefs-capi/src/image.rs new file mode 100644 index 00000000..2e6d69e6 --- /dev/null +++ b/crates/composefs-capi/src/image.rs @@ -0,0 +1,476 @@ +use std::ffi::{CStr, c_char, c_int, c_void}; +use std::io::{Read, Seek}; +use std::mem::{ManuallyDrop, MaybeUninit}; +use std::os::fd::{AsFd, AsRawFd, BorrowedFd, FromRawFd}; +use std::ptr; + +use libc::size_t; +use rustix::fs::{AtFlags, FileType, Mode, OFlags, RawDir, openat, readlinkat, statat}; +use zerocopy::IntoBytes; + +use crate::FfiNode; +use crate::convert::{ffi_tree_to_filesystem, filesystem_to_ffi_tree}; +use crate::errno::set_errno; +use composefs::erofs::format::FormatVersion; +use composefs::erofs::reader::erofs_to_filesystem; +use composefs::erofs::writer::{ValidatedFileSystem, mkfs_erofs_versioned}; +use composefs::fsverity::Sha256HashValue; + +// C callback types +type LcfsWriteCb = unsafe extern "C" fn(*mut c_void, *mut c_void, size_t) -> isize; +// C struct layout from lcfs-writer.h +#[repr(C)] +pub struct LcfsWriteOptions { + pub format: u32, + pub version: u32, + pub flags: u32, + pub digest_out: *mut u8, + pub file: *mut c_void, + pub file_write_cb: Option, + pub max_version: u32, + pub reserved: [u32; 3], + pub reserved2: [*mut c_void; 4], +} + +#[repr(C)] +pub struct LcfsReadOptions { + pub toplevel_entries: *const *const c_char, + pub reserved: [u32; 3], + pub reserved2: [*mut c_void; 4], +} + +const LCFS_BUILD_COMPUTE_DIGEST: u32 = 1 << 3; +const LCFS_VERSION_MAX: u32 = 1; +// All currently defined flags. The C library defines LCFS_FLAGS_MASK = 0 +// (no flags yet), so any non-zero flags value is invalid. +const LCFS_FLAGS_MASK: u32 = 0; + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_load_node_from_image( + image_data: *const u8, + image_data_size: size_t, +) -> *mut FfiNode { + unsafe { lcfs_load_node_from_image_ext(image_data, image_data_size, ptr::null()) } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_load_node_from_image_ext( + image_data: *const u8, + image_data_size: size_t, + options: *const LcfsReadOptions, +) -> *mut FfiNode { + if image_data.is_null() || image_data_size == 0 { + set_errno(libc::EINVAL); + return ptr::null_mut(); + } + + let data = unsafe { std::slice::from_raw_parts(image_data, image_data_size) }; + + let fs = match erofs_to_filesystem::(data) { + Ok(fs) => fs, + Err(_) => { + set_errno(libc::EINVAL); + return ptr::null_mut(); + } + }; + + let root = filesystem_to_ffi_tree(&fs); + + // Apply toplevel_entries filter if specified + if !options.is_null() { + unsafe { + let opts = &*options; + if !opts.toplevel_entries.is_null() { + filter_toplevel(root, opts.toplevel_entries); + } + } + } + + root +} + +unsafe fn filter_toplevel(root: *mut FfiNode, entries: *const *const c_char) { + if root.is_null() || entries.is_null() { + return; + } + + unsafe { + // Collect allowed names + let mut allowed = Vec::new(); + let mut p = entries; + while !(*p).is_null() { + allowed.push(CStr::from_ptr(*p)); + p = p.add(1); + } + + // Remove children not in the allowed list + let mut children = (*root).children_as_vec(); + let mut i = 0; + while i < children.len() { + let child = children[i]; + let keep = if !(*child).name.is_null() { + let child_name = CStr::from_ptr((*child).name); + allowed.contains(&child_name) + } else { + false + }; + if keep { + i += 1; + } else { + children.remove(i); + (*child).parent = ptr::null_mut(); + crate::node::lcfs_node_unref(child); + } + } + (*root).children_put_back(children); + } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_load_node_from_fd(fd: c_int) -> *mut FfiNode { + unsafe { lcfs_load_node_from_fd_ext(fd, ptr::null()) } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_load_node_from_fd_ext( + fd: c_int, + options: *const LcfsReadOptions, +) -> *mut FfiNode { + if fd < 0 { + set_errno(libc::EBADF); + return ptr::null_mut(); + } + + let mut file = ManuallyDrop::new(unsafe { std::fs::File::from_raw_fd(fd) }); + if file.seek(std::io::SeekFrom::Start(0)).is_err() { + set_errno(libc::EIO); + return ptr::null_mut(); + } + let mut data = Vec::new(); + if file.read_to_end(&mut data).is_err() { + set_errno(libc::EIO); + return ptr::null_mut(); + } + + unsafe { lcfs_load_node_from_image_ext(data.as_ptr(), data.len(), options) } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_version_from_fd(fd: c_int) -> c_int { + use composefs::erofs::format::{COMPOSEFS_MAGIC, ComposefsHeader}; + use zerocopy::FromBytes; + + if fd < 0 { + set_errno(libc::EBADF); + return -1; + } + + let mut file = ManuallyDrop::new(unsafe { std::fs::File::from_raw_fd(fd) }); + if file.seek(std::io::SeekFrom::Start(0)).is_err() { + set_errno(libc::EIO); + return -1; + } + let mut buf = [0u8; size_of::()]; + if file.read_exact(&mut buf).is_err() { + set_errno(libc::EIO); + return -1; + } + + let header = ComposefsHeader::ref_from_bytes(&buf).unwrap(); + if header.magic != COMPOSEFS_MAGIC || header.version.get() != 1 { + set_errno(libc::EINVAL); + return -1; + } + + header.composefs_version.get() as c_int +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_write_to( + root: *mut FfiNode, + options: *mut LcfsWriteOptions, +) -> c_int { + if root.is_null() || options.is_null() { + set_errno(libc::EINVAL); + return -1; + } + + unsafe { write_to_inner(root, options) } +} + +unsafe fn ffi_tree_has_whiteout(node: *const FfiNode) -> bool { + unsafe { + let n = &*node; + let file_type = n.inode.st_mode & libc::S_IFMT; + if file_type == libc::S_IFCHR && n.inode.st_rdev == 0 { + return true; + } + for &child in n.children_slice() { + if !child.is_null() { + let child_ref = &*child; + if !child_ref.link_to.is_null() { + continue; + } + if ffi_tree_has_whiteout(child) { + return true; + } + } + } + false + } +} + +unsafe fn write_to_inner(root: *mut FfiNode, options: *mut LcfsWriteOptions) -> c_int { + unsafe { + let opts = &mut *options; + let root_ref = &*root; + + // Check for unknown flags + if (opts.flags & !LCFS_FLAGS_MASK) != 0 { + set_errno(libc::EINVAL); + return -1; + } + + // Validate version bounds + if opts.version > LCFS_VERSION_MAX || opts.max_version > LCFS_VERSION_MAX { + set_errno(libc::EINVAL); + return -1; + } + + // Clamp max_version up to at least version + if opts.max_version < opts.version { + opts.max_version = opts.version; + } + + // Convert FfiNode tree to FileSystem + let fs = match ffi_tree_to_filesystem(root_ref) { + Ok(fs) => fs, + Err(_) => { + set_errno(libc::EINVAL); + return -1; + } + }; + + // Validate the filesystem + let validated = match ValidatedFileSystem::new(fs) { + Ok(v) => v, + Err(_) => { + set_errno(libc::EINVAL); + return -1; + } + }; + + // C library auto-bumps version from 0 to 1 when the tree contains + // chardev whiteouts (S_IFCHR, rdev=0) and max_version >= 1. + let mut effective_version = opts.version; + if effective_version < 1 && opts.max_version >= 1 && ffi_tree_has_whiteout(root) { + effective_version = 1; + } + + // Write back the effective version so the caller can observe it + opts.version = effective_version; + + let version = match effective_version { + 0 => FormatVersion::V0, + 1 => FormatVersion::V1, + _ => { + // Should not be reachable after bounds check above + set_errno(libc::EINVAL); + return -1; + } + }; + + // Generate the EROFS image + let image_data = mkfs_erofs_versioned(&validated, version); + + // Compute digest if requested + if opts.flags & LCFS_BUILD_COMPUTE_DIGEST != 0 && !opts.digest_out.is_null() { + let digest = composefs::fsverity::compute_verity::(&image_data); + ptr::copy_nonoverlapping(digest.as_bytes().as_ptr(), opts.digest_out, 32); + } + + // Write through callback + if let Some(write_cb) = opts.file_write_cb { + let mut offset = 0; + while offset < image_data.len() { + let remaining = image_data.len() - offset; + let written = write_cb( + opts.file, + image_data[offset..].as_ptr() as *mut c_void, + remaining, + ); + if written < 0 { + set_errno(libc::EIO); + return -1; + } + offset += written as usize; + } + } + + 0 + } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_load_node_from_file( + dirfd: c_int, + fname: *const c_char, + _buildflags: c_int, +) -> *mut FfiNode { + if fname.is_null() { + set_errno(libc::EINVAL); + return ptr::null_mut(); + } + + unsafe { + let name = CStr::from_ptr(fname); + let dirfd = BorrowedFd::borrow_raw(dirfd); + + let stat = match statat(dirfd, name, AtFlags::SYMLINK_NOFOLLOW) { + Ok(s) => s, + Err(e) => { + set_errno(e.raw_os_error()); + return ptr::null_mut(); + } + }; + + let node = Box::into_raw(Box::new(FfiNode::default())); + (*node).inode.st_mode = stat.st_mode as u32; + (*node).inode.st_uid = stat.st_uid; + (*node).inode.st_gid = stat.st_gid; + (*node).inode.st_nlink = stat.st_nlink as u32; + (*node).inode.st_size = stat.st_size as u64; + (*node).inode.st_rdev = stat.st_rdev as u32; + (*node).inode.st_mtim_sec = stat.st_mtime; + (*node).inode.st_mtim_nsec = stat.st_mtime_nsec as u32; + + if FileType::from_raw_mode(stat.st_mode) == FileType::Symlink + && let Ok(target) = readlinkat(dirfd, name, Vec::new()) + { + (*node).payload = target.into_raw(); + } + + node + } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_build( + dirfd: c_int, + fname: *const c_char, + buildflags: c_int, + _failed_path_out: *mut *mut c_char, +) -> *mut FfiNode { + if fname.is_null() { + set_errno(libc::EINVAL); + return ptr::null_mut(); + } + + unsafe { + let name = CStr::from_ptr(fname); + let dirfd = BorrowedFd::borrow_raw(dirfd); + + let fd = match openat( + dirfd, + name, + OFlags::RDONLY | OFlags::DIRECTORY, + Mode::empty(), + ) { + Ok(fd) => fd, + Err(e) => { + set_errno(e.raw_os_error()); + return ptr::null_mut(); + } + }; + + match build_dir_recursive(fd.as_fd(), buildflags) { + Some(node) => node, + None => { + set_errno(libc::EIO); + ptr::null_mut() + } + } + } +} + +unsafe fn build_dir_recursive(dirfd: BorrowedFd<'_>, buildflags: c_int) -> Option<*mut FfiNode> { + let stat = rustix::fs::fstat(dirfd).ok()?; + + let node = Box::into_raw(Box::new(FfiNode::default())); + unsafe { + (*node).inode.st_mode = stat.st_mode as u32; + (*node).inode.st_uid = stat.st_uid; + (*node).inode.st_gid = stat.st_gid; + (*node).inode.st_nlink = stat.st_nlink as u32; + (*node).inode.st_size = stat.st_size as u64; + (*node).inode.st_rdev = stat.st_rdev as u32; + (*node).inode.st_mtim_sec = stat.st_mtime; + (*node).inode.st_mtim_nsec = stat.st_mtime_nsec as u32; + } + + let mut buf = [MaybeUninit::uninit(); 8192]; + let mut raw_dir = RawDir::new(dirfd, &mut buf); + + while let Some(Ok(entry)) = raw_dir.next() { + let name = entry.file_name(); + if name.to_bytes() == b"." || name.to_bytes() == b".." { + continue; + } + + let name_ptr = name.as_ptr(); + let child = unsafe { lcfs_load_node_from_file(dirfd.as_raw_fd(), name_ptr, buildflags) }; + if child.is_null() { + continue; + } + + unsafe { + if ((*child).inode.st_mode & libc::S_IFMT) == libc::S_IFDIR + && let Ok(child_fd) = openat( + dirfd, + name, + OFlags::RDONLY | OFlags::DIRECTORY, + Mode::empty(), + ) + { + build_dir_children(child_fd.as_fd(), child, buildflags); + } + + crate::node::lcfs_node_add_child(node, child, name_ptr); + } + } + + Some(node) +} + +unsafe fn build_dir_children(dirfd: BorrowedFd<'_>, parent: *mut FfiNode, buildflags: c_int) { + let mut buf = [MaybeUninit::uninit(); 8192]; + let mut raw_dir = RawDir::new(dirfd, &mut buf); + + while let Some(Ok(entry)) = raw_dir.next() { + let name = entry.file_name(); + if name.to_bytes() == b"." || name.to_bytes() == b".." { + continue; + } + + let name_ptr = name.as_ptr(); + let child = unsafe { lcfs_load_node_from_file(dirfd.as_raw_fd(), name_ptr, buildflags) }; + if child.is_null() { + continue; + } + + unsafe { + if ((*child).inode.st_mode & libc::S_IFMT) == libc::S_IFDIR + && let Ok(child_fd) = openat( + dirfd, + name, + OFlags::RDONLY | OFlags::DIRECTORY, + Mode::empty(), + ) + { + build_dir_children(child_fd.as_fd(), child, buildflags); + } + + crate::node::lcfs_node_add_child(parent, child, name_ptr); + } + } +} diff --git a/crates/composefs-capi/src/lib.rs b/crates/composefs-capi/src/lib.rs new file mode 100644 index 00000000..cabd1132 --- /dev/null +++ b/crates/composefs-capi/src/lib.rs @@ -0,0 +1,310 @@ +#![allow(unsafe_code)] +#![allow(clippy::missing_safety_doc)] + +mod convert; +mod errno; +mod fsverity; +mod image; +mod mount; +mod node; + +use std::ffi::{CStr, CString, c_char}; +use std::mem; +use std::ptr; + +use libc::c_int; + +const LCFS_DIGEST_SIZE: usize = 32; + +#[repr(C)] +pub(crate) struct FfiXattr { + key: *mut c_char, + value: *mut c_char, + value_len: u16, + erofs_shared_xattr_offset: i64, +} + +impl FfiXattr { + pub(crate) unsafe fn new(key: &CStr, value: &[u8]) -> Self { + let key_ptr = CString::new(key.to_bytes()).unwrap().into_raw(); + let (value_ptr, value_len) = if value.is_empty() { + (ptr::null_mut(), 0u16) + } else { + let boxed: Box<[u8]> = value.into(); + (Box::into_raw(boxed) as *mut c_char, value.len() as u16) + }; + FfiXattr { + key: key_ptr, + value: value_ptr, + value_len, + erofs_shared_xattr_offset: -1, + } + } + + pub(crate) unsafe fn key_cstr(&self) -> &CStr { + unsafe { CStr::from_ptr(self.key) } + } + + pub(crate) unsafe fn value_bytes(&self) -> &[u8] { + if self.value.is_null() || self.value_len == 0 { + &[] + } else { + unsafe { std::slice::from_raw_parts(self.value as *const u8, self.value_len as usize) } + } + } + + pub(crate) unsafe fn set_value(&mut self, value: &[u8]) { + unsafe { + self.free_value(); + } + if value.is_empty() { + self.value = ptr::null_mut(); + self.value_len = 0; + } else { + let boxed: Box<[u8]> = value.into(); + self.value = Box::into_raw(boxed) as *mut c_char; + self.value_len = value.len() as u16; + } + } + + unsafe fn free_value(&mut self) { + if !self.value.is_null() && self.value_len > 0 { + unsafe { + let p = + ptr::slice_from_raw_parts_mut(self.value as *mut u8, self.value_len as usize); + drop(Box::from_raw(p)); + } + self.value = ptr::null_mut(); + self.value_len = 0; + } + } +} + +impl Drop for FfiXattr { + fn drop(&mut self) { + unsafe { + if !self.key.is_null() { + drop(CString::from_raw(self.key)); + } + self.free_value(); + } + } +} + +#[repr(C)] +pub(crate) struct FfiInode { + pub st_mode: u32, + pub st_nlink: u32, + pub st_uid: u32, + pub st_gid: u32, + pub st_rdev: u32, + pub st_size: u64, + pub st_mtim_sec: i64, + pub st_mtim_nsec: u32, +} + +#[repr(C)] +pub(crate) struct FfiNode { + pub(crate) ref_count: c_int, + pub(crate) parent: *mut FfiNode, + pub(crate) children: *mut *mut FfiNode, + pub(crate) children_capacity: usize, + pub(crate) children_size: usize, + pub(crate) link_to: *mut FfiNode, + pub(crate) link_to_invalid: bool, + pub(crate) name: *mut c_char, + pub(crate) payload: *mut c_char, + pub(crate) content: *mut u8, + pub(crate) xattrs: *mut FfiXattr, + pub(crate) n_xattrs: usize, + pub(crate) xattr_size: usize, + pub(crate) digest_set: bool, + pub(crate) digest: [u8; LCFS_DIGEST_SIZE], + pub(crate) inode: FfiInode, + next: *mut FfiNode, + in_tree: bool, + inode_num: u32, + erofs_compact: bool, + erofs_ipad: u32, + erofs_xattr_size_field: u32, + erofs_isize: u32, + erofs_nid: u64, + erofs_n_blocks: u32, + erofs_tailsize: u32, +} + +impl FfiNode { + pub(crate) unsafe fn children_as_vec(&mut self) -> Vec<*mut FfiNode> { + if self.children.is_null() { + Vec::new() + } else { + unsafe { + Vec::from_raw_parts(self.children, self.children_size, self.children_capacity) + } + } + } + + pub(crate) unsafe fn children_put_back(&mut self, mut v: Vec<*mut FfiNode>) { + self.children = v.as_mut_ptr(); + self.children_size = v.len(); + self.children_capacity = v.capacity(); + mem::forget(v); + } + + pub(crate) unsafe fn children_slice(&self) -> &[*mut FfiNode] { + if self.children.is_null() || self.children_size == 0 { + &[] + } else { + unsafe { std::slice::from_raw_parts(self.children, self.children_size) } + } + } + + pub(crate) unsafe fn set_content_buf(&mut self, data: &[u8]) { + unsafe { + self.free_content(); + } + if !data.is_empty() { + let boxed: Box<[u8]> = data.into(); + self.content = Box::into_raw(boxed) as *mut u8; + } + } + + pub(crate) unsafe fn free_content(&mut self) { + if !self.content.is_null() { + let len = self.inode.st_size as usize; + if len > 0 { + unsafe { + let p = ptr::slice_from_raw_parts_mut(self.content, len); + drop(Box::from_raw(p)); + } + } + self.content = ptr::null_mut(); + } + } + + pub(crate) unsafe fn free_name(&mut self) { + if !self.name.is_null() { + unsafe { + drop(CString::from_raw(self.name)); + } + self.name = ptr::null_mut(); + } + } + + pub(crate) unsafe fn free_payload(&mut self) { + if !self.payload.is_null() { + unsafe { + drop(CString::from_raw(self.payload)); + } + self.payload = ptr::null_mut(); + } + } + + pub(crate) unsafe fn xattrs_as_vec(&mut self) -> Vec { + if self.xattrs.is_null() || self.n_xattrs == 0 { + Vec::new() + } else { + unsafe { Vec::from_raw_parts(self.xattrs, self.n_xattrs, self.n_xattrs) } + } + } + + pub(crate) unsafe fn xattrs_put_back(&mut self, mut v: Vec) { + v.shrink_to_fit(); + if v.is_empty() { + self.xattrs = ptr::null_mut(); + self.n_xattrs = 0; + mem::forget(v); + } else { + self.xattrs = v.as_mut_ptr(); + self.n_xattrs = v.len(); + mem::forget(v); + } + } + + pub(crate) unsafe fn xattrs_slice(&self) -> &[FfiXattr] { + if self.xattrs.is_null() || self.n_xattrs == 0 { + &[] + } else { + unsafe { std::slice::from_raw_parts(self.xattrs, self.n_xattrs) } + } + } + + pub(crate) unsafe fn xattrs_slice_mut(&mut self) -> &mut [FfiXattr] { + if self.xattrs.is_null() || self.n_xattrs == 0 { + &mut [] + } else { + unsafe { std::slice::from_raw_parts_mut(self.xattrs, self.n_xattrs) } + } + } +} + +impl Default for FfiNode { + fn default() -> Self { + FfiNode { + ref_count: 1, + parent: ptr::null_mut(), + children: ptr::null_mut(), + children_capacity: 0, + children_size: 0, + link_to: ptr::null_mut(), + link_to_invalid: false, + name: ptr::null_mut(), + payload: ptr::null_mut(), + content: ptr::null_mut(), + xattrs: ptr::null_mut(), + n_xattrs: 0, + xattr_size: 0, + digest_set: false, + digest: [0u8; LCFS_DIGEST_SIZE], + inode: FfiInode { + st_mode: 0, + st_nlink: 1, + st_uid: 0, + st_gid: 0, + st_rdev: 0, + st_size: 0, + st_mtim_sec: 0, + st_mtim_nsec: 0, + }, + next: ptr::null_mut(), + in_tree: false, + inode_num: 0, + erofs_compact: false, + erofs_ipad: 0, + erofs_xattr_size_field: 0, + erofs_isize: 0, + erofs_nid: 0, + erofs_n_blocks: 0, + erofs_tailsize: 0, + } + } +} + +impl Drop for FfiNode { + fn drop(&mut self) { + unsafe { + if !self.children.is_null() { + // children_size should be 0 after lcfs_node_unref drains them + let v = + Vec::from_raw_parts(self.children, self.children_size, self.children_capacity); + drop(v); + } + self.free_name(); + self.free_payload(); + self.free_content(); + if !self.xattrs.is_null() && self.n_xattrs > 0 { + let v = Vec::from_raw_parts(self.xattrs, self.n_xattrs, self.n_xattrs); + drop(v); + } + } + } +} + +const _: () = { + assert!(size_of::() == 32); + assert!(size_of::() == 48); + assert!(size_of::() == 240); +}; + +#[cfg(test)] +mod tests; diff --git a/crates/composefs-capi/src/mount.rs b/crates/composefs-capi/src/mount.rs new file mode 100644 index 00000000..3309ce7e --- /dev/null +++ b/crates/composefs-capi/src/mount.rs @@ -0,0 +1,171 @@ +use std::ffi::{CStr, CString, c_char, c_int}; +use std::os::fd::{AsFd, FromRawFd, OwnedFd}; + +use libc::size_t; +use rustix::fs::{CWD, Mode, OFlags, open}; + +use crate::errno::set_errno; + +#[repr(C)] +pub struct LcfsMountOptions { + pub objdirs: *const *const c_char, + pub n_objdirs: size_t, + pub workdir: *const c_char, + pub upperdir: *const c_char, + pub expected_fsverity_digest: *const c_char, + pub flags: u32, + pub idmap_fd: c_int, + pub image_mountdir: *const c_char, + pub reserved: [u32; 4], + pub reserved2: [*mut std::ffi::c_void; 4], +} + +const LCFS_MOUNT_FLAGS_REQUIRE_VERITY: u32 = 1 << 0; +const LCFS_MOUNT_FLAGS_IDMAP: u32 = 1 << 3; +const LCFS_MOUNT_FLAGS_TRY_VERITY: u32 = 1 << 4; + +fn io_error_to_errno(e: &std::io::Error) -> c_int { + e.raw_os_error().unwrap_or(libc::EINVAL) +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_mount_image( + path: *const c_char, + mountpoint: *const c_char, + options: *mut LcfsMountOptions, +) -> c_int { + if path.is_null() || mountpoint.is_null() { + set_errno(libc::EINVAL); + return -1; + } + + unsafe { + let path_cstr = CStr::from_ptr(path); + + let image_fd = match open(path_cstr, OFlags::RDONLY | OFlags::CLOEXEC, Mode::empty()) { + Ok(fd) => fd, + Err(e) => { + set_errno(e.raw_os_error()); + return -1; + } + }; + + let raw_fd = rustix::fd::IntoRawFd::into_raw_fd(image_fd); + let result = lcfs_mount_fd(raw_fd, mountpoint, options); + libc::close(raw_fd); + result + } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_mount_fd( + fd: c_int, + mountpoint: *const c_char, + options: *mut LcfsMountOptions, +) -> c_int { + if fd < 0 || mountpoint.is_null() { + set_errno(libc::EINVAL); + return -1; + } + + unsafe { + let mountpoint_cstr = CStr::from_ptr(mountpoint); + + let dup_fd = libc::dup(fd); + if dup_fd < 0 { + return -1; + } + let image_fd = OwnedFd::from_raw_fd(dup_fd); + + let erofs_fd = match composefs::mount::erofs_mount(image_fd) { + Ok(fd) => fd, + Err(e) => { + set_errno(io_error_to_errno(&e)); + return -1; + } + }; + + let mut basedirs: Vec = Vec::new(); + if !options.is_null() { + let opts = &*options; + if !opts.objdirs.is_null() && opts.n_objdirs > 0 { + for i in 0..opts.n_objdirs { + let dir_ptr = *opts.objdirs.add(i); + if !dir_ptr.is_null() { + basedirs.push(CStr::from_ptr(dir_ptr).to_owned()); + } + } + } + } + + let verity = if !options.is_null() { + let opts = &*options; + if (opts.flags & LCFS_MOUNT_FLAGS_REQUIRE_VERITY) != 0 { + composefs::mount::VerityRequirement::Required + } else if (opts.flags & LCFS_MOUNT_FLAGS_TRY_VERITY) != 0 { + composefs::mount::VerityRequirement::Try + } else { + composefs::mount::VerityRequirement::Disabled + } + } else { + composefs::mount::VerityRequirement::Disabled + }; + + if !basedirs.is_empty() { + let mut basedir_fds: Vec = Vec::new(); + for dir in &basedirs { + match open( + dir.as_c_str(), + OFlags::RDONLY | OFlags::DIRECTORY | OFlags::CLOEXEC, + Mode::empty(), + ) { + Ok(fd) => basedir_fds.push(fd), + Err(e) => { + set_errno(e.raw_os_error()); + return -1; + } + } + } + + let borrowed: Vec<_> = basedir_fds.iter().map(|fd| fd.as_fd()).collect(); + let mut mount_options = composefs::mount::MountOptions::default(); + + if !options.is_null() { + let opts = &*options; + if (opts.flags & LCFS_MOUNT_FLAGS_IDMAP) != 0 && opts.idmap_fd >= 0 { + let dup_idmap = libc::dup(opts.idmap_fd); + if dup_idmap < 0 { + return -1; + } + mount_options.set_idmap(OwnedFd::from_raw_fd(dup_idmap)); + } + } + + match composefs::mount::composefs_fsmount( + erofs_fd, + "composefs", + &borrowed, + verity, + &mount_options, + ) { + Ok(fs_fd) => { + if let Err(e) = composefs::mount::mount_at(&fs_fd, CWD, mountpoint_cstr) { + set_errno(e.raw_os_error()); + return -1; + } + } + Err(e) => { + set_errno(io_error_to_errno(&e)); + return -1; + } + } + } else { + if let Err(e) = composefs::mount::mount_at(&erofs_fd, CWD, mountpoint_cstr) { + set_errno(e.raw_os_error()); + return -1; + } + } + + 0 + } +} diff --git a/crates/composefs-capi/src/node.rs b/crates/composefs-capi/src/node.rs new file mode 100644 index 00000000..05ab7fb4 --- /dev/null +++ b/crates/composefs-capi/src/node.rs @@ -0,0 +1,809 @@ +use std::ffi::{CStr, CString, c_char, c_int}; +use std::ptr; + +use libc::{self, size_t, timespec}; + +use crate::errno::set_errno; +use crate::{FfiNode, FfiXattr, LCFS_DIGEST_SIZE}; + +// EROFS xattr on-disk overhead constants, matching the C library. +const LCFS_INODE_XATTRMETA_SIZE: usize = 4; +const LCFS_XATTR_HEADER_SIZE: usize = 12; +const LCFS_INODE_EXTERNAL_XATTR_MAX: usize = u16::MAX as usize / 2; // 32767 +const XATTR_NAME_MAX: usize = 255; + +// --------------------------------------------------------------------------- +// Node lifecycle +// --------------------------------------------------------------------------- + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_new() -> *mut FfiNode { + let node = Box::new(FfiNode::default()); + Box::into_raw(node) +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_ref(node: *mut FfiNode) -> *mut FfiNode { + if node.is_null() { + return ptr::null_mut(); + } + unsafe { + (*node).ref_count += 1; + } + node +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_unref(node: *mut FfiNode) { + if node.is_null() { + return; + } + unsafe { + (*node).ref_count -= 1; + if (*node).ref_count > 0 { + return; + } + + // Unref all children + for i in 0..(*node).children_size { + let child = *(*node).children.add(i); + (*child).parent = ptr::null_mut(); + lcfs_node_unref(child); + } + (*node).children_size = 0; + + if !(*node).link_to.is_null() { + let target = (*node).link_to; + (*node).link_to = ptr::null_mut(); + lcfs_node_unref(target); + } + + drop(Box::from_raw(node)); + } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_clone(node: *mut FfiNode) -> *mut FfiNode { + if node.is_null() { + set_errno(libc::EINVAL); + return ptr::null_mut(); + } + + unsafe { + let src = &*node; + let mut cloned = FfiNode::default(); + + cloned.inode.st_mode = src.inode.st_mode; + cloned.inode.st_nlink = src.inode.st_nlink; + cloned.inode.st_uid = src.inode.st_uid; + cloned.inode.st_gid = src.inode.st_gid; + cloned.inode.st_rdev = src.inode.st_rdev; + cloned.inode.st_size = src.inode.st_size; + cloned.inode.st_mtim_sec = src.inode.st_mtim_sec; + cloned.inode.st_mtim_nsec = src.inode.st_mtim_nsec; + cloned.xattr_size = src.xattr_size; + cloned.digest = src.digest; + cloned.digest_set = src.digest_set; + + // Deep-copy payload + if !src.payload.is_null() { + cloned.payload = CString::new(CStr::from_ptr(src.payload).to_bytes()) + .unwrap() + .into_raw(); + } + + // Deep-copy content + if !src.content.is_null() && src.inode.st_size > 0 { + let data = std::slice::from_raw_parts(src.content, src.inode.st_size as usize); + cloned.set_content_buf(data); + } + + // Deep-copy xattrs + if !src.xattrs.is_null() && src.n_xattrs > 0 { + let src_xattrs = std::slice::from_raw_parts(src.xattrs, src.n_xattrs); + let cloned_xattrs: Vec = src_xattrs + .iter() + .map(|x| FfiXattr::new(x.key_cstr(), x.value_bytes())) + .collect(); + cloned.xattrs_put_back(cloned_xattrs); + } + + // Clone link_to + if !src.link_to.is_null() { + cloned.link_to = lcfs_node_ref(src.link_to); + } + + Box::into_raw(Box::new(cloned)) + } +} + +/// Mapping of (old node pointer -> new cloned node pointer) used during deep clone +/// to rewrite hardlink targets after cloning. +struct CloneMapping { + entries: Vec<(*mut FfiNode, *mut FfiNode)>, +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_clone_deep(node: *mut FfiNode) -> *mut FfiNode { + if node.is_null() { + set_errno(libc::EINVAL); + return ptr::null_mut(); + } + + let mut mapping = CloneMapping { + entries: Vec::new(), + }; + + unsafe { + let cloned = clone_deep_inner(node, &mut mapping); + if !cloned.is_null() { + clone_rewrite_links(cloned, &mapping); + } + cloned + } +} + +unsafe fn clone_deep_inner(node: *mut FfiNode, mapping: &mut CloneMapping) -> *mut FfiNode { + unsafe { + let cloned = lcfs_node_clone(node); + if cloned.is_null() { + return ptr::null_mut(); + } + + mapping.entries.push((node, cloned)); + + // Deep-clone all children + for &child_ptr in (*node).children_slice() { + let child_clone = clone_deep_inner(child_ptr, mapping); + if child_clone.is_null() { + lcfs_node_unref(cloned); + return ptr::null_mut(); + } + let child_name = (*child_ptr).name as *const c_char; + if lcfs_node_add_child(cloned, child_clone, child_name) < 0 { + lcfs_node_unref(child_clone); + lcfs_node_unref(cloned); + return ptr::null_mut(); + } + } + + cloned + } +} + +/// Walk the cloned tree and rewrite any link_to pointers that refer to +/// nodes in the old tree so they point to the corresponding cloned nodes. +unsafe fn clone_rewrite_links(node: *mut FfiNode, mapping: &CloneMapping) { + unsafe { + for &child in (*node).children_slice() { + clone_rewrite_links(child, mapping); + } + + if !(*node).link_to.is_null() { + let old_target = (*node).link_to; + for &(old, new) in &mapping.entries { + if old == old_target { + lcfs_node_unref(old_target); + (*node).link_to = lcfs_node_ref(new); + break; + } + } + } + } +} + +// --------------------------------------------------------------------------- +// Metadata getters/setters +// --------------------------------------------------------------------------- + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_dirp(node: *mut FfiNode) -> bool { + if node.is_null() { + return false; + } + unsafe { ((*node).inode.st_mode & libc::S_IFMT) == libc::S_IFDIR } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_get_mode(node: *mut FfiNode) -> u32 { + if node.is_null() { + return 0; + } + unsafe { (*node).inode.st_mode } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_set_mode(node: *mut FfiNode, mode: u32) { + if node.is_null() { + return; + } + unsafe { + (*node).inode.st_mode = mode; + } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_try_set_mode(node: *mut FfiNode, mode: u32) -> c_int { + if node.is_null() { + set_errno(libc::EINVAL); + return -1; + } + let file_type = mode & libc::S_IFMT; + if file_type != libc::S_IFREG + && file_type != libc::S_IFDIR + && file_type != libc::S_IFCHR + && file_type != libc::S_IFBLK + && file_type != libc::S_IFIFO + && file_type != libc::S_IFLNK + && file_type != libc::S_IFSOCK + { + set_errno(libc::EINVAL); + return -1; + } + unsafe { + (*node).inode.st_mode = mode; + } + 0 +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_get_uid(node: *mut FfiNode) -> u32 { + if node.is_null() { + return 0; + } + unsafe { (*node).inode.st_uid } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_set_uid(node: *mut FfiNode, uid: u32) { + if !node.is_null() { + unsafe { + (*node).inode.st_uid = uid; + } + } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_get_gid(node: *mut FfiNode) -> u32 { + if node.is_null() { + return 0; + } + unsafe { (*node).inode.st_gid } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_set_gid(node: *mut FfiNode, gid: u32) { + if !node.is_null() { + unsafe { + (*node).inode.st_gid = gid; + } + } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_get_nlink(node: *mut FfiNode) -> u32 { + if node.is_null() { + return 0; + } + unsafe { (*node).inode.st_nlink } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_set_nlink(node: *mut FfiNode, nlink: u32) { + if !node.is_null() { + unsafe { + (*node).inode.st_nlink = nlink; + } + } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_get_size(node: *mut FfiNode) -> u64 { + if node.is_null() { + return 0; + } + unsafe { (*node).inode.st_size } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_set_size(node: *mut FfiNode, size: u64) { + if !node.is_null() { + unsafe { + if (*node).inode.st_size != size { + (*node).free_content(); + } + (*node).inode.st_size = size; + } + } +} + +#[unsafe(no_mangle)] +#[deprecated] +pub unsafe extern "C" fn lcfs_node_get_rdev(node: *mut FfiNode) -> u32 { + if node.is_null() { + return 0; + } + unsafe { (*node).inode.st_rdev } +} + +#[unsafe(no_mangle)] +#[deprecated] +pub unsafe extern "C" fn lcfs_node_set_rdev(node: *mut FfiNode, rdev: u32) { + if !node.is_null() { + unsafe { + (*node).inode.st_rdev = rdev; + } + } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_get_rdev64(node: *mut FfiNode) -> u64 { + if node.is_null() { + return 0; + } + unsafe { (*node).inode.st_rdev as u64 } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_set_rdev64(node: *mut FfiNode, rdev: u64) { + if !node.is_null() { + unsafe { + (*node).inode.st_rdev = rdev as u32; + } + } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_get_mtime(node: *mut FfiNode, time: *mut timespec) { + if node.is_null() || time.is_null() { + return; + } + unsafe { + (*time).tv_sec = (*node).inode.st_mtim_sec; + (*time).tv_nsec = (*node).inode.st_mtim_nsec as i64; + } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_set_mtime(node: *mut FfiNode, time: *mut timespec) { + if node.is_null() || time.is_null() { + return; + } + unsafe { + (*node).inode.st_mtim_sec = (*time).tv_sec; + (*node).inode.st_mtim_nsec = (*time).tv_nsec as u32; + } +} + +// --------------------------------------------------------------------------- +// Extended attributes +// --------------------------------------------------------------------------- + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_get_xattr( + node: *mut FfiNode, + name: *const c_char, + length: *mut size_t, +) -> *const c_char { + if node.is_null() || name.is_null() { + return ptr::null(); + } + unsafe { + let name_cstr = CStr::from_ptr(name); + for xattr in (*node).xattrs_slice() { + if xattr.key_cstr() == name_cstr { + if !length.is_null() { + *length = xattr.value_len as usize; + } + return xattr.value as *const c_char; + } + } + } + ptr::null() +} + +/// Compute the EROFS on-disk overhead for an xattr entry. +fn xattr_entry_size(namelen: usize, value_len: usize, is_first: bool) -> usize { + let mut size = (2 * LCFS_INODE_XATTRMETA_SIZE) - 1 + namelen + value_len; + if is_first { + size += LCFS_XATTR_HEADER_SIZE; + } + size +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_set_xattr( + node: *mut FfiNode, + name: *const c_char, + value: *const c_char, + value_len: size_t, +) -> c_int { + if node.is_null() || name.is_null() { + set_errno(libc::EINVAL); + return -1; + } + unsafe { + let name_cstr = CStr::from_ptr(name); + let namelen = name_cstr.to_bytes().len(); + + if namelen == 0 || namelen > XATTR_NAME_MAX { + set_errno(libc::ERANGE); + return -1; + } + + if value_len > u16::MAX as usize { + set_errno(libc::EINVAL); + return -1; + } + + let val_slice: &[u8] = if value.is_null() { + &[] + } else { + std::slice::from_raw_parts(value as *const u8, value_len) + }; + + // Update existing — adjust tracked xattr_size for the value change + for xattr in (*node).xattrs_slice_mut() { + if xattr.key_cstr() == name_cstr { + let is_only = (*node).n_xattrs == 1; + let old_entry = xattr_entry_size(namelen, xattr.value_len as usize, is_only); + let new_entry = xattr_entry_size(namelen, val_slice.len(), is_only); + let new_total = (*node).xattr_size - old_entry + new_entry; + if new_total > LCFS_INODE_EXTERNAL_XATTR_MAX { + set_errno(libc::ERANGE); + return -1; + } + (*node).xattr_size = new_total; + xattr.set_value(val_slice); + return 0; + } + } + + // Inserting new — check cumulative size limit + let is_first = (*node).n_xattrs == 0; + let entry_size = xattr_entry_size(namelen, value_len, is_first); + if (*node).xattr_size + entry_size > LCFS_INODE_EXTERNAL_XATTR_MAX { + set_errno(libc::ERANGE); + return -1; + } + + (*node).xattr_size += entry_size; + let new_xattr = FfiXattr::new(name_cstr, val_slice); + let mut xattrs = (*node).xattrs_as_vec(); + xattrs.push(new_xattr); + (*node).xattrs_put_back(xattrs); + } + 0 +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_unset_xattr(node: *mut FfiNode, name: *const c_char) -> c_int { + if node.is_null() || name.is_null() { + set_errno(libc::EINVAL); + return -1; + } + unsafe { + let name_cstr = CStr::from_ptr(name); + let pos = (*node) + .xattrs_slice() + .iter() + .position(|x| x.key_cstr() == name_cstr); + match pos { + Some(idx) => { + let mut xattrs = (*node).xattrs_as_vec(); + let removed = xattrs.remove(idx); + let namelen = removed.key_cstr().to_bytes().len(); + let was_last = xattrs.is_empty(); + let mut entry_size = xattr_entry_size(namelen, removed.value_len as usize, false); + if was_last { + entry_size += LCFS_XATTR_HEADER_SIZE; + } + (*node).xattr_size = (*node).xattr_size.saturating_sub(entry_size); + drop(removed); + (*node).xattrs_put_back(xattrs); + } + None => { + set_errno(libc::ENODATA); + return -1; + } + } + } + 0 +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_get_n_xattr(node: *mut FfiNode) -> size_t { + if node.is_null() { + return 0; + } + unsafe { (*node).n_xattrs } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_get_xattr_name( + node: *mut FfiNode, + index: size_t, +) -> *const c_char { + if node.is_null() { + return ptr::null(); + } + unsafe { + if index >= (*node).n_xattrs { + return ptr::null(); + } + (*node).xattrs_slice()[index].key + } +} + +// --------------------------------------------------------------------------- +// Content and payload +// --------------------------------------------------------------------------- + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_set_payload( + node: *mut FfiNode, + payload: *const c_char, +) -> c_int { + if node.is_null() { + set_errno(libc::EINVAL); + return -1; + } + unsafe { + if payload.is_null() { + (*node).free_payload(); + } else { + let cstr = CStr::from_ptr(payload); + if cstr.to_bytes().len() >= libc::PATH_MAX as usize { + set_errno(libc::ENAMETOOLONG); + return -1; + } + (*node).free_payload(); + (*node).payload = cstr.to_owned().into_raw(); + } + } + 0 +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_set_symlink_payload( + node: *mut FfiNode, + payload: *const c_char, +) -> c_int { + if node.is_null() { + set_errno(libc::EINVAL); + return -1; + } + unsafe { + if payload.is_null() || *payload == 0 { + set_errno(libc::EINVAL); + return -1; + } + let ret = lcfs_node_set_payload(node, payload); + if ret < 0 { + return ret; + } + if !(*node).payload.is_null() { + (*node).inode.st_size = CStr::from_ptr((*node).payload).to_bytes().len() as u64; + } + } + 0 +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_get_payload(node: *mut FfiNode) -> *const c_char { + if node.is_null() { + return ptr::null(); + } + unsafe { (*node).payload } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_set_content( + node: *mut FfiNode, + data: *const u8, + data_size: size_t, +) -> c_int { + if node.is_null() { + set_errno(libc::EINVAL); + return -1; + } + unsafe { + if data.is_null() || data_size == 0 { + (*node).free_content(); + (*node).inode.st_size = 0; + } else { + let content_slice = std::slice::from_raw_parts(data, data_size); + (*node).inode.st_size = data_size as u64; + (*node).set_content_buf(content_slice); + } + } + 0 +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_get_content(node: *mut FfiNode) -> *const u8 { + if node.is_null() { + return ptr::null(); + } + unsafe { (*node).content } +} + +// --------------------------------------------------------------------------- +// Tree structure +// --------------------------------------------------------------------------- + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_get_name(node: *mut FfiNode) -> *const c_char { + if node.is_null() { + return ptr::null(); + } + unsafe { (*node).name } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_get_parent(node: *mut FfiNode) -> *mut FfiNode { + if node.is_null() { + return ptr::null_mut(); + } + unsafe { (*node).parent } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_get_n_children(node: *mut FfiNode) -> size_t { + if node.is_null() { + return 0; + } + unsafe { (*node).children_size } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_get_child(node: *mut FfiNode, i: size_t) -> *mut FfiNode { + if node.is_null() { + return ptr::null_mut(); + } + unsafe { + if i >= (*node).children_size { + return ptr::null_mut(); + } + (*node).children_slice()[i] + } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_lookup_child( + node: *mut FfiNode, + name: *const c_char, +) -> *mut FfiNode { + if node.is_null() || name.is_null() { + return ptr::null_mut(); + } + unsafe { + let name_cstr = CStr::from_ptr(name); + for &child_ptr in (*node).children_slice() { + if !(*child_ptr).name.is_null() && CStr::from_ptr((*child_ptr).name) == name_cstr { + return child_ptr; + } + } + } + ptr::null_mut() +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_add_child( + parent: *mut FfiNode, + child: *mut FfiNode, + name: *const c_char, +) -> c_int { + if parent.is_null() || child.is_null() || name.is_null() { + set_errno(libc::EINVAL); + return -1; + } + + unsafe { + if ((*parent).inode.st_mode & libc::S_IFMT) != libc::S_IFDIR { + set_errno(libc::ENOTDIR); + return -1; + } + + let name_cstr = CStr::from_ptr(name); + let name_bytes = name_cstr.to_bytes(); + + if name_bytes.is_empty() { + set_errno(libc::EINVAL); + return -1; + } + + if name_bytes.len() > 255 { + set_errno(libc::ENAMETOOLONG); + return -1; + } + + // Child already has a name (already in a tree) + if !(*child).name.is_null() { + set_errno(libc::EMLINK); + return -1; + } + + // Check for duplicate name + for &existing in (*parent).children_slice() { + if !(*existing).name.is_null() && CStr::from_ptr((*existing).name) == name_cstr { + set_errno(libc::EEXIST); + return -1; + } + } + + // Set name and parent on child + (*child).name = CString::new(name_bytes).unwrap().into_raw(); + (*child).parent = parent; + + // Insert sorted by name + let mut children = (*parent).children_as_vec(); + let insert_pos = children + .binary_search_by(|probe| { + let probe_name = CStr::from_ptr((**probe).name); + probe_name.to_bytes().cmp(name_bytes) + }) + .unwrap_or_else(|pos| pos); + children.insert(insert_pos, child); + (*parent).children_put_back(children); + } + 0 +} + +// --------------------------------------------------------------------------- +// Hardlinks +// --------------------------------------------------------------------------- + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_make_hardlink(node: *mut FfiNode, target: *mut FfiNode) { + if node.is_null() || target.is_null() { + return; + } + unsafe { + if !(*node).link_to.is_null() { + let old = (*node).link_to; + (*node).link_to = ptr::null_mut(); + lcfs_node_unref(old); + } + (*node).link_to = lcfs_node_ref(target); + } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_get_hardlink_target(node: *mut FfiNode) -> *mut FfiNode { + if node.is_null() { + return ptr::null_mut(); + } + unsafe { (*node).link_to } +} + +// --------------------------------------------------------------------------- +// fs-verity digest on node +// --------------------------------------------------------------------------- + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_get_fsverity_digest(node: *mut FfiNode) -> *const u8 { + if node.is_null() { + return ptr::null(); + } + unsafe { + if (*node).digest_set { + (*node).digest.as_ptr() + } else { + ptr::null() + } + } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn lcfs_node_set_fsverity_digest(node: *mut FfiNode, digest: *const u8) { + if node.is_null() || digest.is_null() { + return; + } + unsafe { + (*node) + .digest + .copy_from_slice(std::slice::from_raw_parts(digest, LCFS_DIGEST_SIZE)); + (*node).digest_set = true; + } +} diff --git a/crates/composefs-capi/src/tests.rs b/crates/composefs-capi/src/tests.rs new file mode 100644 index 00000000..c2b20f98 --- /dev/null +++ b/crates/composefs-capi/src/tests.rs @@ -0,0 +1,394 @@ +use std::ffi::CString; +use std::io::{Seek, Write}; +use std::os::fd::{AsRawFd, FromRawFd}; +use std::ptr; + +use crate::node; + +unsafe extern "C" { + fn test_basic(); + fn test_xattr_addremove(); + fn test_xattr_doubleadd(); + fn test_add_uninitialized_child(); + fn test_hardlinked_whiteout_load(); + fn test_no_verity(); +} + +#[test] +fn c_test_basic() { + unsafe { test_basic() }; +} + +#[test] +fn c_test_xattr_addremove() { + unsafe { test_xattr_addremove() }; +} + +#[test] +fn c_test_xattr_doubleadd() { + unsafe { test_xattr_doubleadd() }; +} + +#[test] +fn c_test_add_uninitialized_child() { + unsafe { test_add_uninitialized_child() }; +} + +#[test] +fn c_test_hardlinked_whiteout_load() { + unsafe { test_hardlinked_whiteout_load() }; +} + +#[test] +fn c_test_no_verity() { + unsafe { test_no_verity() }; +} + +// ----------------------------------------------------------------------- +// Tests for bugs found in cross-reference review vs C libcomposefs +// ----------------------------------------------------------------------- + +/// Bug 5: lcfs_node_new must initialize nlink to 1 (not 0). +/// The C library sets st_nlink = 1 in lcfs_node_new(). +#[test] +fn test_nlink_default_is_one() { + unsafe { + let n = node::lcfs_node_new(); + assert!(!n.is_null()); + assert_eq!(node::lcfs_node_get_nlink(n), 1, "nlink must default to 1"); + node::lcfs_node_unref(n); + } +} + +/// Bug 3: lcfs_node_set_symlink_payload must update node size to payload length. +/// The C library sets node->inode.st_size = strlen(node->payload). +#[test] +fn test_symlink_payload_updates_size() { + unsafe { + let n = node::lcfs_node_new(); + node::lcfs_node_set_mode(n, libc::S_IFLNK | 0o777); + + let target = CString::new("/usr/bin/bash").unwrap(); + let ret = node::lcfs_node_set_symlink_payload(n, target.as_ptr()); + assert_eq!(ret, 0); + assert_eq!( + node::lcfs_node_get_size(n), + 13, + "size must equal symlink target length" + ); + + node::lcfs_node_unref(n); + } +} + +/// Bug 3: lcfs_node_set_symlink_payload must reject NULL payload. +#[test] +fn test_symlink_payload_rejects_null() { + unsafe { + let n = node::lcfs_node_new(); + node::lcfs_node_set_mode(n, libc::S_IFLNK | 0o777); + + let ret = node::lcfs_node_set_symlink_payload(n, ptr::null()); + assert_eq!(ret, -1, "NULL symlink target must fail"); + + node::lcfs_node_unref(n); + } +} + +/// Bug 3: lcfs_node_set_symlink_payload must reject empty string payload. +#[test] +fn test_symlink_payload_rejects_empty() { + unsafe { + let n = node::lcfs_node_new(); + node::lcfs_node_set_mode(n, libc::S_IFLNK | 0o777); + + let empty = CString::new("").unwrap(); + let ret = node::lcfs_node_set_symlink_payload(n, empty.as_ptr()); + assert_eq!(ret, -1, "empty symlink target must fail"); + + node::lcfs_node_unref(n); + } +} + +/// Bug 7: lcfs_node_set_payload must reject payloads >= PATH_MAX. +/// The C library checks strlen(payload) >= PATH_MAX and returns ENAMETOOLONG. +#[test] +fn test_payload_rejects_too_long() { + unsafe { + let n = node::lcfs_node_new(); + node::lcfs_node_set_mode(n, libc::S_IFREG | 0o644); + + // PATH_MAX is typically 4096 + let long_path = "x".repeat(libc::PATH_MAX as usize); + let payload = CString::new(long_path).unwrap(); + let ret = node::lcfs_node_set_payload(n, payload.as_ptr()); + assert_eq!(ret, -1, "payload >= PATH_MAX must fail"); + + let errno_val = *libc::__errno_location(); + assert_eq!(errno_val, libc::ENAMETOOLONG, "errno must be ENAMETOOLONG"); + + node::lcfs_node_unref(n); + } +} + +/// Bug 7: Payload just under PATH_MAX should succeed. +#[test] +fn test_payload_under_path_max_succeeds() { + unsafe { + let n = node::lcfs_node_new(); + node::lcfs_node_set_mode(n, libc::S_IFREG | 0o644); + + let ok_path = "x".repeat(libc::PATH_MAX as usize - 1); + let payload = CString::new(ok_path).unwrap(); + let ret = node::lcfs_node_set_payload(n, payload.as_ptr()); + assert_eq!(ret, 0, "payload < PATH_MAX must succeed"); + + node::lcfs_node_unref(n); + } +} + +/// Bug 8: lcfs_node_set_xattr must reject empty xattr names. +/// The C library checks namelen == 0 and returns ERANGE. +#[test] +fn test_xattr_rejects_empty_name() { + unsafe { + let n = node::lcfs_node_new(); + let empty_name = CString::new("").unwrap(); + let value = CString::new("val").unwrap(); + let ret = node::lcfs_node_set_xattr(n, empty_name.as_ptr(), value.as_ptr(), 3); + assert_eq!(ret, -1, "empty xattr name must fail"); + + let errno_val = *libc::__errno_location(); + assert_eq!(errno_val, libc::ERANGE, "errno must be ERANGE"); + + node::lcfs_node_unref(n); + } +} + +/// Bug 8: lcfs_node_set_xattr must reject xattr names > XATTR_NAME_MAX (255). +#[test] +fn test_xattr_rejects_long_name() { + unsafe { + let n = node::lcfs_node_new(); + let long_name = "x".repeat(256); + let name = CString::new(long_name).unwrap(); + let value = CString::new("val").unwrap(); + let ret = node::lcfs_node_set_xattr(n, name.as_ptr(), value.as_ptr(), 3); + assert_eq!(ret, -1, "xattr name > 255 must fail"); + + let errno_val = *libc::__errno_location(); + assert_eq!(errno_val, libc::ERANGE, "errno must be ERANGE"); + + node::lcfs_node_unref(n); + } +} + +/// Bug 8: lcfs_node_set_xattr must reject value_len > UINT16_MAX. +#[test] +fn test_xattr_rejects_huge_value() { + unsafe { + let n = node::lcfs_node_new(); + let name = CString::new("user.test").unwrap(); + let too_big = u16::MAX as usize + 1; + let data = vec![0u8; too_big]; + let ret = node::lcfs_node_set_xattr(n, name.as_ptr(), data.as_ptr() as *const _, too_big); + assert_eq!(ret, -1, "xattr value > u16::MAX must fail"); + + let errno_val = *libc::__errno_location(); + assert_eq!(errno_val, libc::EINVAL, "errno must be EINVAL"); + + node::lcfs_node_unref(n); + } +} + +/// Bug 8 (continued): replacing an existing xattr with a huge value must also +/// be checked against the cumulative limit, not just new insertions. +#[test] +fn test_xattr_replacement_checks_limit() { + unsafe { + let n = node::lcfs_node_new(); + // Insert a small xattr first + let name = CString::new("user.test").unwrap(); + let small = CString::new("x").unwrap(); + let ret = node::lcfs_node_set_xattr(n, name.as_ptr(), small.as_ptr(), 1); + assert_eq!(ret, 0, "initial small xattr must succeed"); + + // Now try to replace it with a value just under u16::MAX (within the + // per-value limit but likely exceeding cumulative EROFS limit) + let big_len: usize = 60000; // > LCFS_INODE_EXTERNAL_XATTR_MAX (32767) + let big = vec![b'A'; big_len]; + let ret = node::lcfs_node_set_xattr(n, name.as_ptr(), big.as_ptr() as *const _, big_len); + assert_eq!(ret, -1, "replacing with huge value must fail"); + + let errno_val = *libc::__errno_location(); + assert_eq!(errno_val, libc::ERANGE, "errno must be ERANGE"); + + node::lcfs_node_unref(n); + } +} + +/// Bug 4: lcfs_compute_fsverity_from_fd must hash from current offset, +/// not seek to the beginning. The C library documents: "the computation +/// starts from the current offset position of the file." +#[test] +fn test_fsverity_from_fd_respects_offset() { + use crate::fsverity; + + // Create an anonymous temp file via memfd + let name = CString::new("test-fsverity").unwrap(); + let fd = unsafe { libc::memfd_create(name.as_ptr(), 0) }; + assert!(fd >= 0, "memfd_create failed"); + + // Write known content: "AAABBB" + let mut file = unsafe { std::fs::File::from_raw_fd(fd) }; + file.write_all(b"AAABBB").unwrap(); + + // Compute expected hash of just "BBB" (the tail after seeking past 3 bytes) + let mut expected_digest = [0u8; 32]; + unsafe { + fsverity::lcfs_compute_fsverity_from_data( + expected_digest.as_mut_ptr(), + b"BBB".as_ptr() as *mut u8, + 3, + ); + } + + // Seek to offset 3, then compute from fd — must hash only remaining bytes + file.seek(std::io::SeekFrom::Start(3)).unwrap(); + let mut actual_digest = [0u8; 32]; + let fd = file.as_raw_fd(); + unsafe { + let ret = fsverity::lcfs_compute_fsverity_from_fd(actual_digest.as_mut_ptr(), fd); + assert_eq!(ret, 0); + } + // Prevent File from closing the fd (we handed raw fd to FFI) + std::mem::forget(file); + + assert_eq!( + actual_digest, expected_digest, + "fsverity from fd at offset 3 must hash only the remaining bytes" + ); + + unsafe { libc::close(fd) }; +} + +/// Bug 6: lcfs_write_to must reject version > LCFS_VERSION_MAX (1). +#[test] +fn test_write_to_rejects_invalid_version() { + use crate::image::{LcfsWriteOptions, lcfs_write_to}; + + unsafe { + let root = node::lcfs_node_new(); + node::lcfs_node_set_mode(root, libc::S_IFDIR | 0o755); + + let mut options: LcfsWriteOptions = std::mem::zeroed(); + options.version = 99; // invalid + options.max_version = 99; + + let ret = lcfs_write_to(root, &mut options); + assert_eq!(ret, -1, "version > 1 must fail"); + + let errno_val = *libc::__errno_location(); + assert_eq!(errno_val, libc::EINVAL, "errno must be EINVAL"); + + node::lcfs_node_unref(root); + } +} + +/// Bug 6: lcfs_write_to must clamp max_version up to version. +#[test] +fn test_write_to_clamps_max_version() { + use crate::image::{LcfsWriteOptions, lcfs_write_to}; + + unsafe extern "C" fn null_write( + _file: *mut std::ffi::c_void, + _buf: *mut std::ffi::c_void, + count: usize, + ) -> isize { + count as isize + } + + unsafe { + let root = node::lcfs_node_new(); + node::lcfs_node_set_mode(root, libc::S_IFDIR | 0o755); + + let mut options: LcfsWriteOptions = std::mem::zeroed(); + options.format = 1; // LCFS_FORMAT_EROFS + options.version = 1; + options.max_version = 0; // less than version + options.file_write_cb = Some(null_write); + + let ret = lcfs_write_to(root, &mut options); + assert_eq!(ret, 0, "valid write should succeed"); + assert!( + options.max_version >= options.version, + "max_version must be clamped up to at least version" + ); + + node::lcfs_node_unref(root); + } +} + +/// Bug 1: lcfs_node_clone_deep must rewrite hardlink targets to point +/// to nodes within the cloned tree, not the original tree. +#[test] +fn test_clone_deep_rewrites_hardlinks() { + unsafe { + // Build a small tree: root/ + // ├── file (regular) + // └── link -> file (hardlink) + let root = node::lcfs_node_new(); + node::lcfs_node_set_mode(root, libc::S_IFDIR | 0o755); + + let file = node::lcfs_node_new(); + node::lcfs_node_set_mode(file, libc::S_IFREG | 0o644); + let file_name = CString::new("file").unwrap(); + assert_eq!(node::lcfs_node_add_child(root, file, file_name.as_ptr()), 0); + + let link = node::lcfs_node_new(); + node::lcfs_node_make_hardlink(link, file); + let link_name = CString::new("link").unwrap(); + assert_eq!(node::lcfs_node_add_child(root, link, link_name.as_ptr()), 0); + + // Verify original tree: link's target IS the file node + let orig_target = node::lcfs_node_get_hardlink_target(link); + assert_eq!(orig_target, file, "original link must target the file node"); + + // Deep-clone the tree + let cloned_root = node::lcfs_node_clone_deep(root); + assert!(!cloned_root.is_null(), "clone_deep must succeed"); + + // Find the cloned "file" and "link" children + let cloned_file_name = CString::new("file").unwrap(); + let cloned_file = node::lcfs_node_lookup_child(cloned_root, cloned_file_name.as_ptr()); + assert!(!cloned_file.is_null()); + + let cloned_link_name = CString::new("link").unwrap(); + let cloned_link = node::lcfs_node_lookup_child(cloned_root, cloned_link_name.as_ptr()); + assert!(!cloned_link.is_null()); + + // The cloned link's hardlink target must point to the CLONED file, + // not the original file. + let cloned_target = node::lcfs_node_get_hardlink_target(cloned_link); + assert!( + !cloned_target.is_null(), + "cloned link must still have a hardlink target" + ); + assert_ne!( + cloned_target, file, + "cloned link must NOT point to the original file node" + ); + assert_eq!( + cloned_target, cloned_file, + "cloned link must point to the cloned file node" + ); + + // Verify the cloned nodes are distinct from the originals + assert_ne!(cloned_root, root); + assert_ne!(cloned_file, file); + assert_ne!(cloned_link, link); + + node::lcfs_node_unref(cloned_root); + node::lcfs_node_unref(root); + } +} diff --git a/crates/composefs-capi/tests/Containerfile.test-capi b/crates/composefs-capi/tests/Containerfile.test-capi new file mode 100644 index 00000000..e0c602e0 --- /dev/null +++ b/crates/composefs-capi/tests/Containerfile.test-capi @@ -0,0 +1,17 @@ +FROM quay.io/fedora/fedora:latest +RUN dnf -y install meson gcc gcc-c++ pkg-config openssl-devel erofs-utils \ + fsverity-utils gzip diffutils coreutils findutils && dnf clean all +COPY composefs-c /src/composefs-c +COPY libcomposefs_capi.so /usr/lib64/ +WORKDIR /src/composefs-c +RUN meson setup build && meson compile -C build +RUN cp /usr/lib64/libcomposefs_capi.so build/libcomposefs/libcomposefs.so.1.4.0 && \ + ln -sf libcomposefs.so.1.4.0 build/libcomposefs/libcomposefs.so.1 && \ + ln -sf libcomposefs.so.1 build/libcomposefs/libcomposefs.so +RUN export LD_LIBRARY_PATH=/src/composefs-c/build/libcomposefs && \ + ldd build/tools/mkcomposefs | grep composefs && \ + tests/test-checksums.sh build/tools tests/assets \ + "config.dump.gz config-with-hard-link.dump.gz special.dump bigfile.dump bigfile-xattr.dump special_v1.dump honggfuzz-long-symlink.dump honggfuzz-longlink-unterminated.dump honggfuzz-bigfile-with-acl.dump no-newline.dump honggfuzz-chardev-nonzero-size.dump longlink.dump honggfuzz-write-inode-data.dump" +RUN export LD_LIBRARY_PATH=/src/composefs-c/build/libcomposefs && \ + tests/test-units.sh build/tools && \ + echo "=== All C tests passed with Rust libcomposefs ===" diff --git a/crates/composefs-capi/tests/test-capi-container.sh b/crates/composefs-capi/tests/test-capi-container.sh new file mode 100755 index 00000000..7cdc3e84 --- /dev/null +++ b/crates/composefs-capi/tests/test-capi-container.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Run the C composefs test suite (test-checksums, test-units) against the +# Rust-built libcomposefs shared library inside a container. +# +# Usage: test-capi-container.sh [composefs-c-repo] +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_DIR="$(cd "$SCRIPT_DIR/../../.." && pwd)" +C_REPO="${1:-}" + +cargo build --release -p composefs-capi --manifest-path="$REPO_DIR/Cargo.toml" + +BUILD_CTX=$(mktemp -d) +trap 'rm -rf "$BUILD_CTX"' EXIT + +cp "$REPO_DIR/target/release/libcomposefs_capi.so" "$BUILD_CTX/" + +if [ -n "$C_REPO" ]; then + cp -a "$(cd "$C_REPO" && pwd)" "$BUILD_CTX/composefs-c" +else + git clone --depth=1 https://github.com/composefs/composefs.git "$BUILD_CTX/composefs-c" +fi + +podman build --no-cache -f "$SCRIPT_DIR/Containerfile.test-capi" "$BUILD_CTX" diff --git a/crates/composefs-capi/tests/test_lcfs.c b/crates/composefs-capi/tests/test_lcfs.c new file mode 100644 index 00000000..135c006e --- /dev/null +++ b/crates/composefs-capi/tests/test_lcfs.c @@ -0,0 +1,247 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR Apache-2.0 */ +/* Adapted from composefs tests/test-lcfs.c for use as a Rust integration test */ +#define _GNU_SOURCE + +#include "lcfs-writer.h" +#include "lcfs-mount.h" +#include "lcfs-erofs.h" +#include "private/erofs_fs_wrapper.h" +#include +#include +#include +#include +#include +#include + +static inline void lcfs_node_unrefp(struct lcfs_node_s **nodep) +{ + if (*nodep != NULL) { + lcfs_node_unref(*nodep); + *nodep = NULL; + } +} +#define cleanup_node __attribute__((cleanup(lcfs_node_unrefp))) + +static ssize_t write_cb(void *_file, void *buf, size_t count) +{ + FILE *file = _file; + + return fwrite(buf, 1, count, file); +} + +static int testwrite_node(struct lcfs_node_s *node) +{ + char *bufp = NULL; + size_t bufsz = 0; + FILE *buf = open_memstream(&bufp, &bufsz); + + struct lcfs_write_options_s options = { 0 }; + options.format = LCFS_FORMAT_EROFS; + options.version = 1; + options.max_version = 1; + options.file = buf; + options.file_write_cb = write_cb; + + int r = lcfs_write_to(node, &options); + int saved_errno = errno; + fclose(buf); + free(bufp); + errno = saved_errno; + return r; +} + +void test_basic(void) +{ + cleanup_node struct lcfs_node_s *node = lcfs_node_new(); + lcfs_node_set_mode(node, S_IFDIR | 0755); + cleanup_node struct lcfs_node_s *child = lcfs_node_new(); + lcfs_node_set_mode(child, S_IFDIR | 0700); + int r = lcfs_node_add_child(node, child, "somechild"); + assert(r == 0); + // Adding child took ownership + child = NULL; + r = testwrite_node(node); + assert(r == 0); +} + +void test_xattr_addremove(void) +{ + cleanup_node struct lcfs_node_s *node = lcfs_node_new(); + lcfs_node_set_mode(node, S_IFDIR | 0755); + cleanup_node struct lcfs_node_s *child = lcfs_node_new(); + lcfs_node_set_mode(child, S_IFDIR | 0700); + int r = lcfs_node_unset_xattr(child, "user.foo"); + int errsv = errno; + assert(r == -1); + assert(errsv == ENODATA); + r = lcfs_node_set_xattr(child, "user.foo", "bar", 3); + assert(r == 0); + r = lcfs_node_unset_xattr(child, "user.foo"); + assert(r == 0); + r = lcfs_node_add_child(node, child, "somechild"); + assert(r == 0); + child = NULL; +} + +// Test that calling lcfs_node_set_xattr multiple times +// with the same key has last-one-wins semantics. +void test_xattr_doubleadd(void) +{ + cleanup_node struct lcfs_node_s *node = lcfs_node_new(); + lcfs_node_set_mode(node, S_IFDIR | 0755); + cleanup_node struct lcfs_node_s *child = lcfs_node_new(); + lcfs_node_set_mode(child, S_IFDIR | 0700); + int r = lcfs_node_set_xattr(child, "user.foo", "bar", 3); + assert(r == 0); + // Should successfully silently overwrite. + r = lcfs_node_set_xattr(child, "user.foo", "baz", 3); + assert(r == 0); + + size_t found_len; + const char *found_value = lcfs_node_get_xattr(child, "user.foo", &found_len); + assert(found_value); + assert(found_len == 3); + assert(memcmp(found_value, "baz", found_len) == 0); + r = lcfs_node_add_child(node, child, "somechild"); + assert(r == 0); + child = NULL; +} + +void test_add_uninitialized_child(void) +{ + cleanup_node struct lcfs_node_s *node = lcfs_node_new(); + lcfs_node_set_mode(node, S_IFDIR | 0755); + // libostree today does this pattern of creating an empty (uninitialized) + // child and passing it to lcfs_node_add_child first. Verify this + // continues to work for the forseeable future. + cleanup_node struct lcfs_node_s *child = lcfs_node_new(); + int r = lcfs_node_add_child(node, child, "somechild"); + assert(r == 0); + // Adding child took ownership + child = NULL; + + // But we should fail to write an EROFS with this + r = testwrite_node(node); + assert(r == -1); + assert(errno == EINVAL); +} + +/* Regression test for heap-use-after-free when loading an EROFS image that + * contains a hardlinked whiteout (chardev with rdev=0, nlink>1). + * + * A whiteout represents the absence of a file, so nlink>1 is semantically + * invalid. The loader must reject such images with EINVAL rather than + * silently processing them (which previously caused a use-after-free via a + * stale node_hash entry when the alias dirent appeared before the canonical + * one in the directory block). + * + * We construct a minimal EROFS image in memory rather than loading a binary + * fixture, so the test is self-contained. + */ +void test_hardlinked_whiteout_load(void) +{ + /* + * Image layout (2 blocks = 8192 bytes, all in block 0's metadata area): + * + * 0x000 lcfs_erofs_header_s (composefs header, 32 bytes) + * 0x400 erofs_super_block (EROFS superblock, 128 bytes) + * 0x480 erofs_inode_compact root dir, nid=36, 32 bytes + * 0x4A0 inline dir data 3 dirents + names, 41 bytes + * 0x4E0 erofs_inode_compact whiteout, nid=39, 32 bytes + */ + uint8_t image[2 * EROFS_BLKSIZ]; + memset(image, 0, sizeof(image)); + + /* Composefs header at offset 0 */ + struct lcfs_erofs_header_s *cfs = (struct lcfs_erofs_header_s *)image; + cfs->magic = htole32(LCFS_EROFS_MAGIC); + cfs->version = htole32(LCFS_EROFS_VERSION); + + /* EROFS superblock at offset 1024 */ + struct erofs_super_block *sb = + (struct erofs_super_block *)(image + EROFS_SUPER_OFFSET); + sb->magic = htole32(EROFS_SUPER_MAGIC_V1); + sb->blkszbits = EROFS_BLKSIZ_BITS; + sb->root_nid = htole16(36); /* nid=36 → offset 36*32 = 0x480 */ + sb->inos = htole64(2); + sb->blocks = htole32(2); + sb->meta_blkaddr = htole32(0); + sb->xattr_blkaddr = htole32(0); + + /* Root directory inode (compact, 32 bytes) at offset 0x480, nid=36. + * Data layout = FLAT_INLINE (tailpacked dir entries follow the inode). */ + const uint16_t root_nid = 36; + const uint16_t wh_nid = 39; /* offset 39*32 = 0x4E0 */ + struct erofs_inode_compact *root_ino = + (struct erofs_inode_compact *)(image + root_nid * EROFS_SLOTSIZE); + root_ino->i_format = + htole16((EROFS_INODE_FLAT_INLINE << EROFS_I_DATALAYOUT_BIT) | + (EROFS_INODE_LAYOUT_COMPACT << EROFS_I_VERSION_BIT)); + root_ino->i_mode = htole16(S_IFDIR | 0755); + root_ino->i_nlink = htole16(2); + + /* Build inline directory data right after the root inode (offset 0x4A0). + * 3 entries: "." (self), ".." (parent), "wh" (whiteout child). + * Each dirent is 12 bytes; names start at offset 3*12 = 36. */ + uint8_t *dir = image + root_nid * EROFS_SLOTSIZE + + sizeof(struct erofs_inode_compact); + const uint16_t names_off = 3 * sizeof(struct erofs_dirent); /* 36 */ + /* "." at offset 36, ".." at 37, "wh" at 39 → total = 41 bytes */ + const uint32_t dir_size = names_off + 1 + 2 + 2; /* 41 */ + root_ino->i_size = htole32(dir_size); + + struct erofs_dirent *de = (struct erofs_dirent *)dir; + /* dirent[0]: "." */ + de[0].nid = htole64(root_nid); + de[0].nameoff = htole16(names_off); + de[0].file_type = EROFS_FT_DIR; + /* dirent[1]: ".." */ + de[1].nid = htole64(root_nid); + de[1].nameoff = htole16(names_off + 1); + de[1].file_type = EROFS_FT_DIR; + /* dirent[2]: "wh" */ + de[2].nid = htole64(wh_nid); + de[2].nameoff = htole16(names_off + 3); + de[2].file_type = EROFS_FT_CHRDEV; + + memcpy(dir + names_off, ".", 1); + memcpy(dir + names_off + 1, "..", 2); + memcpy(dir + names_off + 3, "wh", 2); + + /* Whiteout inode (compact, 32 bytes) at offset 0x4E0, nid=39. + * chardev with rdev=0 and nlink=252 (>1 triggers EINVAL). */ + struct erofs_inode_compact *wh_ino = + (struct erofs_inode_compact *)(image + wh_nid * EROFS_SLOTSIZE); + wh_ino->i_format = + htole16((EROFS_INODE_FLAT_PLAIN << EROFS_I_DATALAYOUT_BIT) | + (EROFS_INODE_LAYOUT_COMPACT << EROFS_I_VERSION_BIT)); + wh_ino->i_mode = htole16(S_IFCHR); + wh_ino->i_nlink = htole16(252); + wh_ino->i_u.rdev = htole32(0); /* rdev=0 makes it a whiteout */ + + /* The loader must reject this image with EINVAL (hardlinked whiteout) + * and must not crash (the original bug was a use-after-free). */ + cleanup_node struct lcfs_node_s *root = + lcfs_load_node_from_image(image, sizeof(image)); + int errsv = errno; + assert(root == NULL); + assert(errsv == EINVAL); +} + +// Verifies that lcfs_fd_measure_fsverity fails on a fd without fsverity +void test_no_verity(void) +{ + char buf[] = "/tmp/test-verity.XXXXXX"; + int tmpfd = mkstemp(buf); + assert(tmpfd > 0); + + uint8_t digest[LCFS_DIGEST_SIZE]; + int r = lcfs_fd_measure_fsverity(digest, tmpfd); + int errsv = errno; + assert(r != 0); + // We may get ENOSYS from qemu userspace emulation not implementing the ioctl + if (getenv("CFS_TEST_ARCH_EMULATION") == NULL) + assert(errsv == ENOVERITY); + close(tmpfd); + unlink(buf); +} diff --git a/crates/composefs-ctl/src/lib.rs b/crates/composefs-ctl/src/lib.rs index f6ccd2a1..9a8c592e 100644 --- a/crates/composefs-ctl/src/lib.rs +++ b/crates/composefs-ctl/src/lib.rs @@ -24,6 +24,7 @@ pub use composefs_oci; pub mod composefs_info; pub mod mkcomposefs; +pub mod mountcomposefs; /// Varlink RPC service exposing repository operations over a Unix socket. pub mod varlink; @@ -1264,8 +1265,8 @@ fn dump_file_impl( let leaf = fs.leaf(*leaf_id); match &leaf.content { Regular(f) => match f { - Inline(..) => println!("{} inline", file_path.display()), - External(id, _) => { + Inline(..) | Sparse(..) => println!("{} inline", file_path.display()), + External(id, _) | ExternalNoVerity(id, _) => { println!("{} {}", file_path.display(), id.to_object_pathname()); } }, diff --git a/crates/composefs-ctl/src/main.rs b/crates/composefs-ctl/src/main.rs index 96cdbf91..942f1fc0 100644 --- a/crates/composefs-ctl/src/main.rs +++ b/crates/composefs-ctl/src/main.rs @@ -1,8 +1,9 @@ //! Command-line control utility for composefs repositories and images. //! -//! `cfsctl` is a multi-call binary: when invoked as `mkcomposefs` or -//! `composefs-info` (via symlink or hardlink), it dispatches to the -//! corresponding tool. Otherwise it runs the normal `cfsctl` interface. +//! `cfsctl` is a multi-call binary: when invoked as `mkcomposefs`, +//! `composefs-info`, or `mount.composefs` (via symlink or hardlink), +//! it dispatches to the corresponding tool. Otherwise it runs the normal +//! `cfsctl` interface. //! //! ## C composefs compatibility roadmap //! @@ -46,6 +47,7 @@ fn main() -> Result<()> { match binary_name().as_deref() { Some("mkcomposefs") => composefs_ctl::mkcomposefs::run(), Some("composefs-info") => composefs_ctl::composefs_info::run(), + Some("mount.composefs") => composefs_ctl::mountcomposefs::run(), // When called as `cfsctl mkcomposefs ...` or `cfsctl composefs-info ...`, // intercept before clap so that --help and all flags go to the real tool. _ if std::env::args_os().nth(1).as_deref() == Some(OsStr::new("mkcomposefs")) => { @@ -54,6 +56,9 @@ fn main() -> Result<()> { _ if std::env::args_os().nth(1).as_deref() == Some(OsStr::new("composefs-info")) => { composefs_ctl::composefs_info::run_from_args(rest_of_args()) } + _ if std::env::args_os().nth(1).as_deref() == Some(OsStr::new("mount.composefs")) => { + composefs_ctl::mountcomposefs::run_from_args(rest_of_args()) + } _ => { // If we were spawned as a userns helper process, handle that and exit. // This MUST be called before the tokio runtime is created. diff --git a/crates/composefs-ctl/src/mountcomposefs.rs b/crates/composefs-ctl/src/mountcomposefs.rs new file mode 100644 index 00000000..3ec4eaae --- /dev/null +++ b/crates/composefs-ctl/src/mountcomposefs.rs @@ -0,0 +1,272 @@ +//! mount.composefs - Mount helper for composefs images. +//! +//! This is a Rust reimplementation of the C mount.composefs tool, providing +//! a compatible command-line interface. When installed as `/usr/sbin/mount.composefs`, +//! the kernel dispatches to it for `mount -t composefs` commands. + +use std::ffi::OsString; +use std::os::fd::{AsFd, OwnedFd}; + +use anyhow::{Context, Result, bail}; +use clap::Parser; +use rustix::fs::{CWD, Mode, OFlags}; + +use composefs::fsverity::{FsVerityHashValue, MeasureVerityError, Sha256HashValue, measure_verity}; +use composefs::mount::{MountOptions, VerityRequirement, composefs_fsmount, mount_at}; + +/// Mount helper for composefs images. +/// +/// Supported -o options: basedir=PATH[:PATH], digest=DIGEST, idmap=PATH, +/// verity, tryverity, ro, rw, upperdir=PATH, workdir=PATH +#[derive(Parser, Debug)] +#[command(name = "mount.composefs")] +struct MountArgs { + /// Filesystem type (must be "composefs") + #[arg(short = 't', value_name = "TYPE")] + fstype: Option, + + /// Mount options (comma-separated key[=value] pairs) + #[arg(short = 'o', value_name = "OPTIONS")] + options: Option, + + /// Path to the composefs image + image: String, + + /// Mount point + mountpoint: String, +} + +fn unescape_option(s: &str) -> String { + let mut result = String::with_capacity(s.len()); + let mut chars = s.chars(); + while let Some(c) = chars.next() { + if c == '\\' { + if let Some(next) = chars.next() { + result.push(next); + } + } else { + result.push(c); + } + } + result +} + +struct ParsedOption { + key: String, + value: Option, +} + +fn parse_mount_options(options: &str) -> Vec { + let mut result = Vec::new(); + let mut rest = options; + + while !rest.is_empty() { + let mut equal_pos = None; + let mut end_pos = rest.len(); + let bytes = rest.as_bytes(); + let mut i = 0; + + while i < bytes.len() { + if bytes[i] == b'=' && equal_pos.is_none() { + equal_pos = Some(i); + } else if bytes[i] == b'\\' && i + 1 < bytes.len() { + i += 1; + } else if bytes[i] == b',' { + end_pos = i; + break; + } + i += 1; + } + + let entry = &rest[..end_pos]; + rest = if end_pos < rest.len() { + &rest[end_pos + 1..] + } else { + "" + }; + + let (key, value) = if let Some(eq) = equal_pos { + if eq < end_pos { + (&entry[..eq], Some(unescape_option(&entry[eq + 1..]))) + } else { + (entry, None) + } + } else { + (entry, None) + }; + + result.push(ParsedOption { + key: key.to_string(), + value, + }); + } + + result +} + +fn run_mount(args: impl IntoIterator) -> Result<()> { + let cli = + MountArgs::try_parse_from(std::iter::once(OsString::from("mount.composefs")).chain(args))?; + + if let Some(ref fstype) = cli.fstype + && fstype != "composefs" + { + bail!("Unsupported fs type '{fstype}'"); + } + + let mut opt_basedir: Option = None; + let mut opt_digest: Option = None; + let mut opt_upperdir: Option = None; + let mut opt_workdir: Option = None; + let mut opt_idmap: Option = None; + let mut opt_verity = false; + let mut opt_tryverity = false; + let mut opt_ro = false; + + if let Some(ref opts_str) = cli.options { + for opt in parse_mount_options(opts_str) { + match opt.key.as_str() { + "basedir" => { + opt_basedir = Some(opt.value.context("No value specified for basedir option")?); + } + "digest" => { + opt_digest = Some(opt.value.context("No value specified for digest option")?); + } + "verity" => opt_verity = true, + "tryverity" => opt_tryverity = true, + "upperdir" => { + opt_upperdir = Some( + opt.value + .context("No value specified for upperdir option")?, + ); + } + "workdir" => { + opt_workdir = Some(opt.value.context("No value specified for workdir option")?); + } + "idmap" => { + let idmap_path = opt.value.context("No value specified for idmap option")?; + let idmap_fd = rustix::fs::open( + idmap_path.as_str(), + OFlags::RDONLY | OFlags::CLOEXEC | OFlags::NOCTTY, + Mode::empty(), + ) + .with_context(|| format!("Failed to open idmap {idmap_path}"))?; + opt_idmap = Some(idmap_fd); + } + "rw" => opt_ro = false, + "ro" => opt_ro = true, + other => bail!("Unsupported option: {other}"), + } + } + } + + let basedir_str = match opt_basedir { + Some(ref s) => s.as_str(), + None => { + bail!("No object dirs specified"); + } + }; + + let mut basedir_fds: Vec = Vec::new(); + for dir in basedir_str.split(':') { + if dir.is_empty() { + continue; + } + let fd = rustix::fs::open( + dir, + OFlags::RDONLY | OFlags::DIRECTORY | OFlags::CLOEXEC, + Mode::empty(), + ) + .with_context(|| format!("Failed to open basedir {dir}"))?; + basedir_fds.push(fd); + } + if basedir_fds.is_empty() { + bail!("No object dirs specified"); + } + + match (&opt_upperdir, &opt_workdir) { + (Some(_), None) | (None, Some(_)) => { + bail!("Both workdir and upperdir must be specified if used"); + } + _ => {} + } + + let verity = if opt_verity || opt_digest.is_some() { + VerityRequirement::Required + } else if opt_tryverity { + VerityRequirement::Try + } else { + VerityRequirement::Disabled + }; + + let image_fd = rustix::fs::open( + cli.image.as_str(), + OFlags::RDONLY | OFlags::CLOEXEC, + Mode::empty(), + ) + .with_context(|| format!("Failed to open {}", cli.image))?; + + if let Some(ref digest_hex) = opt_digest { + let expected = Sha256HashValue::from_hex(digest_hex).context("Invalid digest value")?; + match measure_verity::(&image_fd) { + Ok(measured) => { + if measured != expected { + bail!( + "Failed to mount composefs {}: Image has wrong fs-verity", + cli.image + ); + } + } + Err(MeasureVerityError::VerityMissing) => { + bail!( + "Failed to mount composefs {}: Image has no fs-verity", + cli.image + ); + } + Err(e) => { + bail!("Failed to mount composefs {}: {e}", cli.image); + } + } + } + + let mut mount_opts = MountOptions::default(); + if let (Some(upper), Some(work)) = (&opt_upperdir, &opt_workdir) { + let upper_fd = rustix::fs::open( + upper.as_str(), + OFlags::PATH | OFlags::DIRECTORY | OFlags::CLOEXEC, + Mode::empty(), + ) + .with_context(|| format!("Failed to open upperdir {upper}"))?; + let work_fd = rustix::fs::open( + work.as_str(), + OFlags::PATH | OFlags::DIRECTORY | OFlags::CLOEXEC, + Mode::empty(), + ) + .with_context(|| format!("Failed to open workdir {work}"))?; + mount_opts.set_overlay(upper_fd, work_fd); + } + if let Some(idmap_fd) = opt_idmap { + mount_opts.set_idmap(idmap_fd); + } + mount_opts.set_read_write(!opt_ro); + + let borrowed: Vec<_> = basedir_fds.iter().map(|fd| fd.as_fd()).collect(); + let fs_fd = composefs_fsmount(image_fd, "composefs", &borrowed, verity, &mount_opts) + .with_context(|| format!("Failed to mount composefs {}", cli.image))?; + + mount_at(&fs_fd, CWD, cli.mountpoint.as_str()) + .with_context(|| format!("Failed to mount at {}", cli.mountpoint))?; + + Ok(()) +} + +/// Entry point when invoked as `mount.composefs` via argv[0] symlink. +pub fn run() -> Result<()> { + let args: Vec = std::env::args_os().skip(1).collect(); + run_mount(args) +} + +/// Entry point when invoked as `cfsctl mount.composefs ...` subcommand. +pub fn run_from_args(args: Vec) -> Result<()> { + run_mount(args) +} diff --git a/crates/composefs-ctl/tests/test-lib.sh b/crates/composefs-ctl/tests/test-lib.sh new file mode 100755 index 00000000..880a9501 --- /dev/null +++ b/crates/composefs-ctl/tests/test-lib.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# Test helper functions, adapted from composefs test-lib.sh + +fatal() { + echo $@ 1>&2; exit 1 +} + +_fatal_print_file() { + file="$1" + shift + ls -al "$file" >&2 + sed -e 's/^/# /' < "$file" >&2 + fatal "$@" +} + +assert_file_has_content () { + fpath=$1 + shift + for re in "$@"; do + if ! grep -q -e "$re" "$fpath"; then + _fatal_print_file "$fpath" "File '$fpath' doesn't match regexp '$re'" + fi + done +} + +assert_streq () { + if test "$1" != "$2"; then + echo "assertion failed: $1 = $2" 1>&2 + return 1 + fi +} + +check_fsverity () { + fsverity --version >/dev/null 2>&1 || return 1 + tmpfile=$(mktemp --tmpdir lcfs-fsverity.XXXXXX) + echo foo > $tmpfile + fsverity enable $tmpfile >/dev/null 2>&1 || return 1 + return 0 +} + +[[ -v has_fsverity ]] || has_fsverity=$(if check_fsverity; then echo y; else echo n; fi) + +echo Test options: has_fsverity=$has_fsverity diff --git a/crates/composefs-ctl/tests/test-mount-composefs.sh b/crates/composefs-ctl/tests/test-mount-composefs.sh new file mode 100755 index 00000000..7d182136 --- /dev/null +++ b/crates/composefs-ctl/tests/test-mount-composefs.sh @@ -0,0 +1,87 @@ +#!/bin/bash +# Tests for mount.composefs, adapted from composefs tests/test-units.sh +# +# Requires: root (for mount operations), fsverity-utils +# Usage: test-mount-composefs.sh /path/to/cfsctl + +set -e + +CFSCTL=$(cd "$(dirname "$1")" && pwd)/$(basename "$1") +test -x "$CFSCTL" || { echo "Usage: $0 /path/to/cfsctl" >&2; exit 1; } + +# Its more likely that fsverity works in /var/tmp than in /tmp (which +# is typically tmpfs) so use that here. +export TMPDIR=${TMPDIR:-/var/tmp} + +workdir=$(mktemp --directory --tmpdir lcfs-test.XXXXXX) +trap 'rm -rf -- "$workdir"' EXIT + +. $(dirname $0)/test-lib.sh + +function makeimage () { + local dir=$1 + $CFSCTL mkcomposefs --digest-store=$dir/objects $dir/root $dir/test.cfs +} + +function test_mount_digest () { + local dir=$1 + + if [ $has_fsverity = y ]; then + echo foo > $dir/root/a-file + makeimage $dir + + $CFSCTL mount.composefs -o basedir=$dir/objects,digest=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa $dir/test.cfs $dir/mnt 2> $dir/stderr && fatal "non-fsverity mount should not succeed" + assert_file_has_content $dir/stderr "Image has no fs-verity" + + fsverity enable $dir/test.cfs + + $CFSCTL mount.composefs -o basedir=$dir/objects,digest=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa $dir/test.cfs $dir/mnt 2> $dir/stderr && fatal "wrong fsverity mount should not succeed" + assert_file_has_content $dir/stderr "Image has wrong fs-verity" + + local DIGEST=$(fsverity measure $dir/test.cfs | awk "{ print \$1 }" | sed s/sha256://) + + # We should either successfully mount, or start trying and fail for one of these reasons: + # * Permission denied, if not root + # * No such file or directory, if /dev/loop-control is missing + # * Operation not permitted, when running in a sandbox + # What should not happen is that it should fail for fs-verity reasons before trying to mount. + $CFSCTL mount.composefs -o basedir=$dir/objects,digest=$DIGEST $dir/test.cfs $dir/mnt 2> $dir/stderr || assert_file_has_content $dir/stderr "Permission denied\|No such file or directory\|Operation not permitted" + umount $dir/mnt 2> /dev/null || true + fi +} + +function test_mount_basic () { + local dir=$1 + + dd if=/dev/zero bs=1 count=1024 2>/dev/null > $dir/root/a-file + makeimage $dir + + # Try to mount; we may not have root or the right kernel, so accept + # permission/sandbox failures. + $CFSCTL mount.composefs -o basedir=$dir/objects $dir/test.cfs $dir/mnt 2> $dir/stderr || { + assert_file_has_content $dir/stderr "Permission denied\|No such file or directory\|Operation not permitted" + return 0 + } + + # If we got here, mounting succeeded — verify the file exists + test -f $dir/mnt/a-file || fatal "a-file not found in mount" + + umount $dir/mnt +} + +TESTS="test_mount_basic test_mount_digest" +res=0 +for i in $TESTS; do + testdir=$(mktemp -d $workdir/$i.XXXXXX) + mkdir $testdir/root $testdir/objects $testdir/mnt + if $i $testdir ; then + echo "Test $i: OK" + else + res=1 + echo "Test $i: FAILED" + fi + + rm -rf $testdir +done + +exit $res diff --git a/crates/composefs-ioctls/src/lib.rs b/crates/composefs-ioctls/src/lib.rs index e6f42f44..cb220d36 100644 --- a/crates/composefs-ioctls/src/lib.rs +++ b/crates/composefs-ioctls/src/lib.rs @@ -25,6 +25,7 @@ #![deny(unsafe_code)] pub mod fsverity; +pub mod mount; #[cfg(feature = "loop-device")] pub mod loop_device; diff --git a/crates/composefs-ioctls/src/mount.rs b/crates/composefs-ioctls/src/mount.rs new file mode 100644 index 00000000..b92802cb --- /dev/null +++ b/crates/composefs-ioctls/src/mount.rs @@ -0,0 +1,55 @@ +//! Low-level mount syscall wrappers not yet exposed by rustix. +#![allow(unsafe_code)] + +use std::os::fd::{AsFd, AsRawFd}; + +const MOUNT_ATTR_IDMAP: u64 = 0x00100000; +const AT_EMPTY_PATH: u32 = 0x1000; + +#[cfg(not(any( + target_arch = "mips", + target_arch = "mips32r6", + target_arch = "mips64", + target_arch = "mips64r6" +)))] +const SYS_MOUNT_SETATTR: std::ffi::c_long = 442; +#[cfg(any(target_arch = "mips", target_arch = "mips32r6"))] +const SYS_MOUNT_SETATTR: std::ffi::c_long = 4442; +#[cfg(any(target_arch = "mips64", target_arch = "mips64r6"))] +const SYS_MOUNT_SETATTR: std::ffi::c_long = 5442; + +#[repr(C)] +struct MountAttr { + attr_set: u64, + attr_clr: u64, + propagation: u64, + userns_fd: u64, +} + +unsafe extern "C" { + fn syscall(num: std::ffi::c_long, ...) -> std::ffi::c_long; +} + +/// Applies an ID mapping from a user namespace to a mount. +pub fn mount_setattr_idmap(mount_fd: impl AsFd, userns_fd: impl AsFd) -> std::io::Result<()> { + let attr = MountAttr { + attr_set: MOUNT_ATTR_IDMAP, + attr_clr: 0, + propagation: 0, + userns_fd: userns_fd.as_fd().as_raw_fd() as u64, + }; + let ret = unsafe { + syscall( + SYS_MOUNT_SETATTR, + mount_fd.as_fd().as_raw_fd(), + c"".as_ptr(), + AT_EMPTY_PATH, + &attr as *const MountAttr, + std::mem::size_of::(), + ) + }; + if ret < 0 { + return Err(std::io::Error::last_os_error()); + } + Ok(()) +} diff --git a/crates/composefs-oci/src/delta.rs b/crates/composefs-oci/src/delta.rs index 361b9b09..da62d271 100644 --- a/crates/composefs-oci/src/delta.rs +++ b/crates/composefs-oci/src/delta.rs @@ -130,7 +130,7 @@ impl ComposeFsDataSource { self.current = Some(match file { RegularFile::Inline(data) => CurrentFile::Inline(Cursor::new(data.to_vec())), - RegularFile::External(id, _size) => { + RegularFile::External(id, _size) | RegularFile::ExternalNoVerity(id, _size) => { let fd = self .source .repo @@ -138,6 +138,12 @@ impl ComposeFsDataSource { .with_context(|| format!("Opening source object for {}", path.display()))?; CurrentFile::External(File::from(fd)) } + RegularFile::Sparse(_) => { + anyhow::bail!( + "Sparse file not supported as delta source: {}", + path.display() + ); + } }); Ok(()) } diff --git a/crates/composefs-setup-root/src/main.rs b/crates/composefs-setup-root/src/main.rs index 689aa605..774590e3 100644 --- a/crates/composefs-setup-root/src/main.rs +++ b/crates/composefs-setup-root/src/main.rs @@ -155,7 +155,7 @@ fn overlay_state(base: impl AsFd, state: impl AsFd, source: &str) -> Result<()> fsconfig_set_string(overlayfs.as_fd(), "source", source)?; overlayfs_set_fd(overlayfs.as_fd(), "workdir", work.as_fd())?; overlayfs_set_fd(overlayfs.as_fd(), "upperdir", upper.as_fd())?; - overlayfs_set_lower_and_data_fds(&overlayfs, base.as_fd(), None::)?; + overlayfs_set_lower_and_data_fds(&overlayfs, base.as_fd(), &[])?; fsconfig_create(overlayfs.as_fd())?; let fs = fsmount( overlayfs.as_fd(), diff --git a/crates/composefs/src/dumpfile.rs b/crates/composefs/src/dumpfile.rs index 8b3d6253..8d18e43d 100644 --- a/crates/composefs/src/dumpfile.rs +++ b/crates/composefs/src/dumpfile.rs @@ -191,7 +191,9 @@ pub fn write_leaf( data, None, ), - LeafContent::Regular(RegularFile::External(id, size)) => write_entry( + LeafContent::Regular( + RegularFile::External(id, size) | RegularFile::ExternalNoVerity(id, size), + ) => write_entry( writer, path, stat, @@ -203,6 +205,18 @@ pub fn write_leaf( &[], Some(&id.to_hex()), ), + LeafContent::Regular(RegularFile::Sparse(size)) => write_entry( + writer, + path, + stat, + FileType::RegularFile, + *size, + nlink, + 0, + "", + &[], + None, + ), LeafContent::BlockDevice(rdev) => write_entry( writer, path, @@ -450,28 +464,38 @@ pub fn add_entry_to_filesystem( .ok_or_else(|| anyhow::anyhow!("Hardlink target not found: {target:?}"))?; Inode::leaf(existing_id) } - Item::RegularInline { ref content, .. } => { + Item::RegularInline { + ref content, size, .. + } => { let stat = entry_to_stat(&entry)?; - let data: Box<[u8]> = match content { - std::borrow::Cow::Borrowed(d) => Box::from(*d), - std::borrow::Cow::Owned(d) => d.clone().into_boxed_slice(), + let leaf_content = if content.is_empty() && size > 0 { + LeafContent::Regular(RegularFile::Sparse(size)) + } else { + let data: Box<[u8]> = match content { + std::borrow::Cow::Borrowed(d) => Box::from(*d), + std::borrow::Cow::Owned(d) => d.clone().into_boxed_slice(), + }; + LeafContent::Regular(RegularFile::Inline(data)) }; - let content = LeafContent::Regular(RegularFile::Inline(data)); - let id = push_leaf(fs, stat, content); + let id = push_leaf(fs, stat, leaf_content); Inode::leaf(id) } Item::Regular { size, ref fsverity_digest, + ref path, .. } => { let stat = entry_to_stat(&entry)?; - let digest = fsverity_digest - .as_ref() - .ok_or_else(|| anyhow::anyhow!("External file missing fsverity digest"))?; - let object_id = ObjectID::from_hex(digest)?; - let content = LeafContent::Regular(RegularFile::External(object_id, size)); - let id = push_leaf(fs, stat, content); + let leaf_content = if let Some(digest) = fsverity_digest.as_ref() { + let object_id = ObjectID::from_hex(digest)?; + LeafContent::Regular(RegularFile::External(object_id, size)) + } else { + let object_id = ObjectID::from_object_pathname(path.as_os_str().as_bytes()) + .map_err(|e| anyhow::anyhow!("invalid object pathname: {e}"))?; + LeafContent::Regular(RegularFile::ExternalNoVerity(object_id, size)) + }; + let id = push_leaf(fs, stat, leaf_content); Inode::leaf(id) } Item::Device { rdev, nlink } => { diff --git a/crates/composefs/src/dumpfile_parse.rs b/crates/composefs/src/dumpfile_parse.rs index f01a28a3..075efc8c 100644 --- a/crates/composefs/src/dumpfile_parse.rs +++ b/crates/composefs/src/dumpfile_parse.rs @@ -83,6 +83,8 @@ pub enum Item<'p> { RegularInline { /// Number of links nlink: u32, + /// Size from dump entry (may differ from content length for sparse files) + size: u64, /// Inline content content: Cow<'p, [u8]>, }, @@ -449,7 +451,11 @@ impl<'p> Entry<'p> { if fsverity_digest.is_some() { anyhow::bail!("Inline file cannot have fsverity digest"); } - Item::RegularInline { nlink, content } + Item::RegularInline { + nlink, + size, + content, + } } } FileType::Symlink => { @@ -525,7 +531,7 @@ impl Item<'_> { pub(crate) fn size(&self) -> u64 { match self { Item::Regular { size, .. } => *size, - Item::RegularInline { content, .. } => content.len() as u64, + Item::RegularInline { size, .. } => *size, // Directories always report 0; the spec says size is ignored. Item::Directory { .. } => 0, _ => 0, diff --git a/crates/composefs/src/erofs/format.rs b/crates/composefs/src/erofs/format.rs index 565bf41a..4616c761 100644 --- a/crates/composefs/src/erofs/format.rs +++ b/crates/composefs/src/erofs/format.rs @@ -263,7 +263,8 @@ impl std::ops::BitOr for FileType { type Output = ModeField; fn bitor(self, permissions: u32) -> ModeField { - ModeField(self | (permissions as u16)) + // Mask out file type bits so we only keep permission bits from the input + ModeField(self | (permissions as u16 & !S_IFMT)) } } diff --git a/crates/composefs/src/erofs/reader.rs b/crates/composefs/src/erofs/reader.rs index 5d9f405f..ddd63e91 100644 --- a/crates/composefs/src/erofs/reader.rs +++ b/crates/composefs/src/erofs/reader.rs @@ -1072,9 +1072,8 @@ impl<'img> Image<'img> { /// C composefs v1.0.8 converts char-device-rdev-0 entries to regular files /// on write (whiteout escaping). The reader must reverse this. fn is_escaped_v1_whiteout(img: &Image, inode: &InodeType) -> anyhow::Result { - // Only relevant for regular files - let mode = inode.mode().0.get(); - if mode & S_IFMT != S_IFREG { + let file_type = inode.mode().0.get() & S_IFMT; + if file_type != S_IFREG { return Ok(false); } @@ -1327,17 +1326,22 @@ pub struct ObjectCollector { impl ObjectCollector { fn visit_xattr(&mut self, attr: &XAttr) -> Result<(), ErofsReaderError> { - // This is the index of "trusted". See XATTR_PREFIXES in format.rs. if attr.header.name_index != 4 { return Ok(()); } - if attr.suffix()? != b"overlay.metacopy" { - return Ok(()); - } - if let Ok(value) = OverlayMetacopy::read_from_bytes(attr.value()?) - && value.valid() - { - self.objects.insert(value.digest); + let suffix = attr.suffix()?; + if suffix == b"overlay.metacopy" { + if let Ok(value) = OverlayMetacopy::read_from_bytes(attr.value()?) + && value.valid() + { + self.objects.insert(value.digest); + } + } else if suffix == b"overlay.redirect" { + let value = attr.value()?; + let path = value.strip_prefix(b"/").unwrap_or(value); + if let Ok(id) = ObjectID::from_object_pathname(path) { + self.objects.insert(id); + } } Ok(()) } @@ -1586,6 +1590,50 @@ fn extract_metacopy_digest( Ok(None) } +/// Try to extract the object ID from a redirect xattr (`trusted.overlay.redirect`). +/// +/// The redirect value is a path like `/55/90e94b...` from which we parse +/// the object ID. Returns `None` if no redirect xattr is present. +fn extract_redirect_object_id( + img: &Image, + inode: &InodeType, +) -> anyhow::Result> { + let Some(xattrs_section) = inode.xattrs()? else { + return Ok(None); + }; + + for id in xattrs_section.shared()? { + let xattr = img.shared_xattr(id.get())?; + if let Some(obj) = check_redirect_xattr(xattr)? { + return Ok(Some(obj)); + } + } + for xattr in xattrs_section.local()? { + let xattr = xattr?; + if let Some(obj) = check_redirect_xattr(xattr)? { + return Ok(Some(obj)); + } + } + Ok(None) +} + +fn check_redirect_xattr( + xattr: &XAttr, +) -> anyhow::Result> { + if xattr.header.name_index != 4 { + return Ok(None); + } + if xattr.suffix()? != b"overlay.redirect" { + return Ok(None); + } + let value = xattr.value()?; + let path = value.strip_prefix(b"/").unwrap_or(value); + match ObjectID::from_object_pathname(path) { + Ok(id) => Ok(Some(id)), + Err(_) => Ok(None), + } +} + /// Check if a single xattr is a valid overlay.metacopy and return the digest. /// /// When `strict` is true, a `trusted.overlay.metacopy` xattr that cannot be @@ -1604,6 +1652,9 @@ fn check_metacopy_xattr( } // At this point we know the xattr is named trusted.overlay.metacopy. let value_bytes = xattr.value()?; + if value_bytes.is_empty() { + return Ok(None); + } let value = match OverlayMetacopy::::read_from_bytes(value_bytes) { Ok(v) => v, Err(_) if strict => { @@ -1823,54 +1874,72 @@ fn populate_directory( let mode = child_inode.mode().0.get(); let file_type = mode & S_IFMT; - let content = match file_type { - S_IFREG => { - // V1 images escape whiteouts (char dev rdev=0) to regular files. - // The is_escaped_whiteout flag was computed above (before the - // root-dir skip check), so reuse it here. - if is_escaped_whiteout { - tree::LeafContent::CharacterDevice(0) - } else if let Some(digest) = - extract_metacopy_digest::(img, &child_inode)? - { - tree::LeafContent::Regular(tree::RegularFile::External( - digest, - child_inode.size(), - )) - } else { - if img.composefs_restricted { - let size = child_inode.size(); - if size > MAX_INLINE_CONTENT as u64 { - anyhow::bail!( - "inline regular file {:?} has size {} \ + // V1 images escape whiteouts (char dev rdev=0) to regular files. + // The is_escaped_whiteout flag was computed above (before the + // root-dir skip check), so check it before the file-type match. + let content = if is_escaped_whiteout { + tree::LeafContent::CharacterDevice(0) + } else { + match file_type { + S_IFREG => { + if let Some(digest) = + extract_metacopy_digest::(img, &child_inode)? + { + tree::LeafContent::Regular(tree::RegularFile::External( + digest, + child_inode.size(), + )) + } else if let Some(id) = + extract_redirect_object_id::(img, &child_inode)? + { + tree::LeafContent::Regular(tree::RegularFile::ExternalNoVerity( + id, + child_inode.size(), + )) + } else if child_inode.data_layout()? == DataLayout::ChunkBased { + tree::LeafContent::Regular(tree::RegularFile::Sparse( + child_inode.size(), + )) + } else { + if img.composefs_restricted + && img.header.composefs_version == COMPOSEFS_VERSION + { + let size = child_inode.size(); + if size > MAX_INLINE_CONTENT as u64 { + anyhow::bail!( + "inline regular file {:?} has size {} \ (max {MAX_INLINE_CONTENT})", - name, - size, - ); + name, + size, + ); + } } + let data = extract_all_file_data(img, &child_inode)?; + tree::LeafContent::Regular(tree::RegularFile::Inline(data.into())) } - let data = extract_all_file_data(img, &child_inode)?; - tree::LeafContent::Regular(tree::RegularFile::Inline(data.into())) } - } - S_IFLNK => { - let target_data = child_inode.inline().unwrap_or(&[]); - if target_data.len() > crate::SYMLINK_MAX { - anyhow::bail!( - "symlink target for {:?} is {} bytes (max {})", - name, - target_data.len(), - crate::SYMLINK_MAX, - ); + S_IFLNK => { + let target_data = extract_all_file_data(img, &child_inode)?; + if img.composefs_restricted + && img.header.composefs_version == COMPOSEFS_VERSION + && target_data.len() > crate::SYMLINK_MAX + { + anyhow::bail!( + "symlink target for {:?} is {} bytes (max {})", + name, + target_data.len(), + crate::SYMLINK_MAX, + ); + } + let target = OsStr::from_bytes(&target_data); + tree::LeafContent::Symlink(Box::from(target)) } - let target = OsStr::from_bytes(target_data); - tree::LeafContent::Symlink(Box::from(target)) + S_IFBLK => tree::LeafContent::BlockDevice(child_inode.u() as u64), + S_IFCHR => tree::LeafContent::CharacterDevice(child_inode.u() as u64), + S_IFIFO => tree::LeafContent::Fifo, + S_IFSOCK => tree::LeafContent::Socket, + _ => anyhow::bail!("unknown file type {:#o} for {:?}", file_type, name), } - S_IFBLK => tree::LeafContent::BlockDevice(child_inode.u() as u64), - S_IFCHR => tree::LeafContent::CharacterDevice(child_inode.u() as u64), - S_IFIFO => tree::LeafContent::Fifo, - S_IFSOCK => tree::LeafContent::Socket, - _ => anyhow::bail!("unknown file type {:#o} for {:?}", file_type, name), }; // Hardlinked whiteouts are semantically invalid: a whiteout represents the @@ -3405,9 +3474,20 @@ mod tests { let dumpfile = String::from_utf8(dumpfile_bytes).unwrap(); let c_image = c_mkcomposefs_from_dumpfile(&dumpfile); + // C mkcomposefs defaults to --max-version=1, auto-bumping + // from V0 to V1 when whiteouts (chardev rdev=0) are present. + let has_whiteout = fs_rs + .leaves + .iter() + .any(|leaf| matches!(leaf.content, tree::LeafContent::CharacterDevice(0))); + let version = if has_whiteout { + FormatVersion::V1 + } else { + FormatVersion::V0 + }; let rust_image = mkfs_erofs_versioned( &mut ValidatedFileSystem::new(fs_rs).unwrap(), - FormatVersion::V0, + version, ); if c_image != rust_image.as_ref() { diff --git a/crates/composefs/src/erofs/writer.rs b/crates/composefs/src/erofs/writer.rs index 406a87cf..4f4c7f68 100644 --- a/crates/composefs/src/erofs/writer.rs +++ b/crates/composefs/src/erofs/writer.rs @@ -420,6 +420,13 @@ struct Directory<'a> { struct Leaf<'a, ObjectID: FsVerityHashValue> { content: &'a tree::LeafContent, nlink: usize, + /// Epoch1 only: number of full data blocks for inline content. + /// Matches C mkcomposefs which splits large inline files into data blocks + /// plus an optional inline tail. Zero for V2 or small files. + n_data_blocks: u32, + /// Epoch1 only: size of the inline tail. When the tail exceeds half a block, + /// it's promoted to a full data block (n_data_blocks++) and tail becomes 0. + inline_tail_size: usize, } #[derive(Debug)] @@ -644,7 +651,7 @@ impl<'a> Directory<'a> { /// 2. Clamp to at least BLOCK_BITS (12) /// 3. Clamp to at most BLOCK_BITS + 31 (max representable) /// 4. Return chunkbits - BLOCK_BITS -fn compute_chunk_format(file_size: u64) -> u32 { +fn compute_chunk_bitsize(file_size: u64) -> u32 { const BLOCK_BITS: u32 = format::BLOCK_BITS as u32; const CHUNK_FORMAT_BLKBITS_MASK: u32 = 0x001F; // 31 @@ -667,27 +674,54 @@ fn compute_chunk_format(file_size: u64) -> u32 { chunkbits = CHUNK_FORMAT_BLKBITS_MASK + BLOCK_BITS; } - chunkbits - BLOCK_BITS + chunkbits +} + +fn compute_chunk_format(file_size: u64) -> u32 { + compute_chunk_bitsize(file_size) - format::BLOCK_BITS as u32 +} + +fn compute_chunk_count(file_size: u64) -> u32 { + let chunkbits = compute_chunk_bitsize(file_size); + let chunksize = 1u64 << chunkbits; + file_size.div_ceil(chunksize) as u32 } impl Leaf<'_, ObjectID> { - fn inode_meta(&self, version: format::FormatVersion) -> InodeMeta { + fn inode_meta(&self, version: format::FormatVersion, block_offset: usize) -> InodeMeta { let (layout, i_u, size) = match &self.content { tree::LeafContent::Regular(tree::RegularFile::Inline(data)) => { if data.is_empty() { (format::DataLayout::FlatPlain, 0, data.len() as u64) + } else if self.n_data_blocks > 0 { + let blkaddr = (block_offset / format::BLOCK_SIZE as usize) as u32; + if self.inline_tail_size > 0 { + (format::DataLayout::FlatInline, blkaddr, data.len() as u64) + } else { + (format::DataLayout::FlatPlain, blkaddr, data.len() as u64) + } } else { (format::DataLayout::FlatInline, 0, data.len() as u64) } } - tree::LeafContent::Regular(tree::RegularFile::External(.., size)) => { - // V1: compute chunk format from file size - // V2: hardcode 31 (origin/main behavior) + tree::LeafContent::Regular( + tree::RegularFile::External(.., size) + | tree::RegularFile::ExternalNoVerity(.., size) + | tree::RegularFile::Sparse(size), + ) => { let chunk_format = match version.epoch() { + // Epoch1: compute chunk format from file size FormatEpoch::Epoch1 => compute_chunk_format(*size), + // Epoch2: hardcode 31 (single-chunk layout) FormatEpoch::Epoch2 => 31, }; - (format::DataLayout::ChunkBased, chunk_format, *size) + let i_u = if self.n_data_blocks > 0 { + let blkaddr = (block_offset / format::BLOCK_SIZE as usize) as u32; + (blkaddr & 0xFFFF0000) | chunk_format + } else { + chunk_format + }; + (format::DataLayout::ChunkBased, i_u, *size) } tree::LeafContent::CharacterDevice(rdev) | tree::LeafContent::BlockDevice(rdev) => { let rdev32: u32 = (*rdev) @@ -699,13 +733,12 @@ impl Leaf<'_, ObjectID> { (format::DataLayout::FlatPlain, 0, 0) } tree::LeafContent::Symlink(target) => { - assert!( - target.len() <= crate::SYMLINK_MAX, - "symlink target is {} bytes (max {})", - target.len(), - crate::SYMLINK_MAX, - ); - (format::DataLayout::FlatInline, 0, target.len() as u64) + if self.n_data_blocks > 0 { + let blkaddr = (block_offset / format::BLOCK_SIZE as usize) as u32; + (format::DataLayout::FlatPlain, blkaddr, target.len() as u64) + } else { + (format::DataLayout::FlatInline, 0, target.len() as u64) + } } }; InodeMeta { @@ -717,16 +750,72 @@ impl Leaf<'_, ObjectID> { } fn write_inline(&self, output: &mut impl Output) { - output.write(match self.content { - tree::LeafContent::Regular(tree::RegularFile::Inline(data)) => data, - tree::LeafContent::Regular(tree::RegularFile::External(..)) => b"\xff\xff\xff\xff", // null chunk - tree::LeafContent::Symlink(target) => target.as_bytes(), - _ => &[], - }); + match self.content { + tree::LeafContent::Regular(tree::RegularFile::Inline(data)) => { + let tail_start = data.len() - self.inline_tail_size; + output.write(&data[tail_start..]); + } + tree::LeafContent::Regular( + tree::RegularFile::External(..) + | tree::RegularFile::ExternalNoVerity(..) + | tree::RegularFile::Sparse(..), + ) => { + let n_chunks = self.inline_tail_size / 4; + for _ in 0..n_chunks { + output.write(b"\xff\xff\xff\xff"); + } + } + tree::LeafContent::Symlink(target) if self.n_data_blocks == 0 => { + output.write(target.as_bytes()); + } + _ => {} + } + } + + fn write_data_blocks(&self, output: &mut impl Output) { + if self.n_data_blocks > 0 { + let block_size = format::BLOCK_SIZE as usize; + match self.content { + tree::LeafContent::Regular(tree::RegularFile::Inline(data)) => { + for i in 0..self.n_data_blocks as usize { + let start = i * block_size; + let end = (start + block_size).min(data.len()); + if start < data.len() { + output.write(&data[start..end]); + } + output.pad(block_size); + } + } + tree::LeafContent::Symlink(target) => { + let data = target.as_bytes(); + let len = data.len().min(block_size); + output.write(&data[..len]); + output.pad(block_size); + } + tree::LeafContent::Regular( + tree::RegularFile::External(..) + | tree::RegularFile::ExternalNoVerity(..) + | tree::RegularFile::Sparse(..), + ) => { + const LCFS_MAX_NONINLINE_CHUNKS: usize = 1024; + for _ in 0..LCFS_MAX_NONINLINE_CHUNKS { + output.write(b"\xff\xff\xff\xff"); + } + } + _ => {} + } + } } } impl Inode<'_, ObjectID> { + fn chunk_inline_tail_size(&self) -> usize { + match &self.content { + InodeContent::Leaf(leaf) => leaf.inline_tail_size, + _ => 0, + } + } + fn file_type(&self) -> format::FileType { // V1 whiteout escaping: char device (rdev=0) entries are written as regular files // to match C mkcomposefs v1.0.8 behavior. @@ -746,6 +835,10 @@ impl Inode<'_, ObjectID> { } } + fn inode_mode(&self) -> format::ModeField { + self.file_type() | self.stat.st_mode + } + /// Check if this inode can use compact format (32 bytes instead of 64). /// /// Compact format is used when: @@ -879,7 +972,7 @@ impl Inode<'_, ObjectID> { let min_mtime = ctx.min_mtime; let meta = match &self.content { InodeContent::Directory(dir) => dir.inode_meta(output.get_block_start(idx)), - InodeContent::Leaf(leaf) => leaf.inode_meta(version), + InodeContent::Leaf(leaf) => leaf.inode_meta(version, output.get_block_start(idx)), }; let InodeMeta { layout, @@ -902,6 +995,59 @@ impl Inode<'_, ObjectID> { // We need to make sure the inline part doesn't overlap a block boundary output.pad(INODE_SLOT_SIZE); + + // Epoch1 promoted symlinks: target was moved to a data block but we + // still need to pad the inode start to a block boundary, matching C + // compute_erofs_inode_padding_for_tail which uses the original + // (pre-promotion) total_size for the block-crossing check. + let is_promoted_symlink = version.epoch() == FormatEpoch::Epoch1 + && matches!(self.file_type(), format::FileType::Symlink) + && matches!(layout, format::DataLayout::FlatPlain); + if is_promoted_symlink { + let block_size = u64::from(format::BLOCK_SIZE); + let current_pos: u64 = output.len().try_into().unwrap(); + let original_total_size = (inode_header_size + xattr_size) as u64 + size; + let pos_block = current_pos / block_size; + let end_block = (current_pos + original_total_size - 1) / block_size; + if pos_block != end_block + && let Some(pad_size) = bytes_to_block_boundary(current_pos) + { + output.write_zeros(pad_size as usize); + } + } + + // Promoted ChunkBased: chunk indices moved to data block, pad inode + // start to block boundary (matching C compute_erofs_inode_padding_for_tail). + let is_promoted_chunk_based = version.epoch() == FormatEpoch::Epoch1 + && matches!(layout, format::DataLayout::ChunkBased) + && self.chunk_inline_tail_size() == 0 + && matches!(&self.content, InodeContent::Leaf(leaf) if leaf.n_data_blocks > 0); + if is_promoted_chunk_based { + let current_pos: u64 = output.len().try_into().unwrap(); + if let Some(pad_size) = bytes_to_block_boundary(current_pos) { + output.write_zeros(pad_size as usize); + } + } + + // ChunkBased inodes in Epoch1 have inline chunk index data that + // needs the same non-symlink tail padding as FlatInline data. + if version.epoch() == FormatEpoch::Epoch1 + && matches!(layout, format::DataLayout::ChunkBased) + && self.chunk_inline_tail_size() > 0 + { + let current_pos: u64 = output.len().try_into().unwrap(); + let non_tail_size = (inode_header_size + xattr_size) as u64; + let tail_size = self.chunk_inline_tail_size() as u64; + let inline_start = current_pos + non_tail_size; + if let Some(block_remainder) = bytes_to_block_boundary(inline_start) + && block_remainder < tail_size + { + let pad_size = (block_remainder.div_ceil(INODE_SLOT_SIZE as u64) + * INODE_SLOT_SIZE as u64) as usize; + output.write_zeros(pad_size); + } + } + if matches!(layout, format::DataLayout::FlatInline) { let inode_and_xattr_size: u64 = (inode_header_size + xattr_size).try_into().unwrap(); @@ -937,7 +1083,7 @@ impl Inode<'_, ObjectID> { output.write_struct(format::CompactInodeHeader { format, xattr_icount: xattr_icount.into(), - mode: self.file_type() | self.stat.st_mode, + mode: self.inode_mode(), nlink: (nlink as u16).into(), size: (size as u32).into(), reserved: 0.into(), @@ -966,7 +1112,7 @@ impl Inode<'_, ObjectID> { output.write_struct(format::ExtendedInodeHeader { format, xattr_icount: xattr_icount.into(), - mode: self.file_type() | self.stat.st_mode, + mode: self.inode_mode(), size: size.into(), u: u.into(), ino: ino.into(), @@ -990,8 +1136,9 @@ impl Inode<'_, ObjectID> { } fn write_blocks(&self, output: &mut impl Output) { - if let InodeContent::Directory(dir) = &self.content { - dir.write_blocks(output); + match &self.content { + InodeContent::Directory(dir) => dir.write_blocks(output), + InodeContent::Leaf(leaf) => leaf.write_data_blocks(output), } } } @@ -1018,18 +1165,36 @@ impl<'a, ObjectID: FsVerityHashValue> InodeCollector<'a, ObjectID> { .. }) = content { + let metacopy = OverlayMetacopy::new(id); xattrs.add( format::XATTR_OVERLAY_METACOPY, - OverlayMetacopy::new(id).as_bytes(), + metacopy.as_bytes(), self.version, ); - let redirect = format!("/{}", id.to_object_pathname()); xattrs.add( format::XATTR_OVERLAY_REDIRECT, redirect.as_bytes(), self.version, ); + } else if let InodeContent::Leaf(Leaf { + content: tree::LeafContent::Regular(tree::RegularFile::ExternalNoVerity(id, ..)), + .. + }) = content + { + xattrs.add(format::XATTR_OVERLAY_METACOPY, b"", self.version); + let redirect = format!("/{}", id.to_object_pathname()); + xattrs.add( + format::XATTR_OVERLAY_REDIRECT, + redirect.as_bytes(), + self.version, + ); + } else if let InodeContent::Leaf(Leaf { + content: tree::LeafContent::Regular(tree::RegularFile::Sparse(..)), + .. + }) = content + { + xattrs.add(format::XATTR_OVERLAY_METACOPY, b"", self.version); } // Add the normal xattrs. They're already listed in sorted order. @@ -1074,11 +1239,54 @@ impl<'a, ObjectID: FsVerityHashValue> InodeCollector<'a, ObjectID> { !(matches!(leaf.content, tree::LeafContent::CharacterDevice(0)) && nlink > 1), "ValidatedFileSystem guarantees whiteout nlink == 1" ); + let (n_data_blocks, inline_tail_size) = if self.version.epoch() == FormatEpoch::Epoch1 { + match &leaf.content { + tree::LeafContent::Regular(tree::RegularFile::Inline(data)) => { + if data.is_empty() { + (0, 0) + } else { + let block_size = format::BLOCK_SIZE as usize; + let mut n_blocks = data.len() / block_size; + let mut tail = data.len() % block_size; + if tail > block_size / 2 { + n_blocks += 1; + tail = 0; + } + (n_blocks as u32, tail) + } + } + tree::LeafContent::Symlink(target) => { + // Initial: no data blocks, tail = target length. + // May be promoted to data block later by fixup_symlink_data_blocks + // when inode_header + xattr_size + target_len >= BLOCK_SIZE. + (0, target.len()) + } + tree::LeafContent::Regular( + tree::RegularFile::External(.., size) + | tree::RegularFile::ExternalNoVerity(.., size) + | tree::RegularFile::Sparse(size), + ) if *size > 0 => { + let chunk_count = compute_chunk_count(*size); + (0, chunk_count as usize * 4) + } + _ => (0, 0), + } + } else { + match &leaf.content { + tree::LeafContent::Regular(tree::RegularFile::Inline(data)) => (0, data.len()), + tree::LeafContent::Regular(tree::RegularFile::External(..)) => { + (0, 4) // single null chunk index + } + _ => (0, 0), + } + }; let inode = self.push_inode( &leaf.stat, InodeContent::Leaf(Leaf { content: &leaf.content, nlink, + n_data_blocks, + inline_tail_size, }), ); @@ -1348,10 +1556,9 @@ impl<'a, ObjectID: FsVerityHashValue> InodeCollector<'a, ObjectID> { } } - // V1: if this directory had whiteout children, add parent xattrs. - // C adds these once per directory, on first whiteout child found. - // Matches OVERLAY_XATTR_ESCAPED_WHITEOUTS, OVERLAY_XATTR_USERXATTR_WHITEOUTS, - // OVERLAY_XATTR_ESCAPED_OPAQUE (=x), OVERLAY_XATTR_USERXATTR_OPAQUE (=x). + // Epoch1: if this directory had whiteout children, add parent xattrs. + // C adds WHITEOUTS + USERXATTR_WHITEOUTS for all versions, and + // OPAQUE + USERXATTR_OPAQUE only for version >= 1. if self.version.epoch() == FormatEpoch::Epoch1 && dir_has_whiteout { self.inodes[me] .xattrs @@ -1359,12 +1566,14 @@ impl<'a, ObjectID: FsVerityHashValue> InodeCollector<'a, ObjectID> { self.inodes[me] .xattrs .add(format::XATTR_USERXATTR_WHITEOUTS, b"", self.version); - self.inodes[me] - .xattrs - .add(format::XATTR_OVERLAY_OPAQUE, b"x", self.version); - self.inodes[me] - .xattrs - .add(format::XATTR_USERXATTR_OPAQUE, b"x", self.version); + if self.version == format::FormatVersion::V1 { + self.inodes[me] + .xattrs + .add(format::XATTR_OVERLAY_OPAQUE, b"x", self.version); + self.inodes[me] + .xattrs + .add(format::XATTR_USERXATTR_OPAQUE, b"x", self.version); + } } entries.sort_unstable_by_key(|e| e.name); @@ -1806,6 +2015,69 @@ type PreparedInodes<'a, ObjectID> = (Vec>, Vec, (u64, /// Shared setup for all `mkfs_erofs_*` entry points. /// +/// Epoch1: promote symlink inodes to data-block layout when the inode header + +/// xattrs + target would fill a full block. Must run after share_xattrs (so +/// xattr sizes are final) and after calculate_min_mtime (so compact/extended +/// is deterministic). +fn fixup_epoch1_data_blocks( + inodes: &mut [Inode], + version: format::FormatVersion, + min_mtime: (u64, u32), +) { + for inode in inodes.iter_mut() { + let (tail_size, nlink, is_symlink) = match &inode.content { + InodeContent::Leaf(leaf) if leaf.inline_tail_size > 0 => { + let is_sym = matches!(leaf.content, tree::LeafContent::Symlink(..)); + (leaf.inline_tail_size, leaf.nlink, is_sym) + } + _ => continue, + }; + + if is_symlink { + let xattr_size = inode.xattrs.byte_size(version); + let use_compact = inode.fits_in_compact(min_mtime, tail_size as u64, nlink); + let inode_header_size = if use_compact { + size_of::() + } else { + size_of::() + }; + let total_size = inode_header_size + xattr_size + tail_size; + if total_size >= format::BLOCK_SIZE as usize { + let leaf = match &mut inode.content { + InodeContent::Leaf(leaf) => leaf, + _ => unreachable!(), + }; + leaf.n_data_blocks += 1; + leaf.inline_tail_size = 0; + } + } else { + let is_chunk_based = match &inode.content { + InodeContent::Leaf(leaf) => matches!( + leaf.content, + tree::LeafContent::Regular( + tree::RegularFile::External(..) + | tree::RegularFile::ExternalNoVerity(..) + | tree::RegularFile::Sparse(..) + ) + ), + _ => false, + }; + if is_chunk_based { + let xattr_size = inode.xattrs.byte_size(version); + let overshoot = xattr_size % INODE_SLOT_SIZE; + if tail_size + overshoot > format::BLOCK_SIZE as usize { + let leaf = match &mut inode.content { + InodeContent::Leaf(leaf) => leaf, + _ => unreachable!(), + }; + leaf.n_data_blocks = 1; + leaf.inline_tail_size = 0; + } + } + } + } +} + /// Collects inodes from the filesystem, injects the Epoch1 opaque xattr on the /// root directory, computes `header_flags` and `composefs_version`, promotes /// repeated xattrs to the shared table, and calculates `min_mtime`. @@ -1848,15 +2120,9 @@ fn prepare_erofs_inodes<'a, ObjectID: FsVerityHashValue>( 0 }; - // V0: auto-bump composefs_version to 1 when user whiteouts present. - // V1: always write composefs_version=1 (the version enum encodes this directly). - let cfs_ver = match version { - format::FormatVersion::V0 => { - let has_user_whiteout = inodes.iter().any(|inode| inode.escaped_whiteout); - if has_user_whiteout { 1u32 } else { 0u32 } - } - _ => version.composefs_version().get(), - }; + // C library writes composefs_version directly from the version option. + // V0 always writes 0, V1 always writes 1. + let cfs_ver = version.composefs_version().get(); (flags, cfs_ver) } @@ -1866,6 +2132,10 @@ fn prepare_erofs_inodes<'a, ObjectID: FsVerityHashValue>( let xattrs = share_xattrs(&mut inodes, version); let min_mtime = calculate_min_mtime(&inodes); + if version.epoch() == FormatEpoch::Epoch1 { + fixup_epoch1_data_blocks(&mut inodes, version, min_mtime); + } + (inodes, xattrs, min_mtime, header_flags, composefs_version) } diff --git a/crates/composefs/src/fs.rs b/crates/composefs/src/fs.rs index a6fb7b96..fe61654b 100644 --- a/crates/composefs/src/fs.rs +++ b/crates/composefs/src/fs.rs @@ -306,7 +306,9 @@ fn write_leaf( LeafContent::Regular(RegularFile::Inline(data)) => { set_file_contents(dirfd, name, &leaf.stat, data)? } - LeafContent::Regular(RegularFile::External(id, size)) => { + LeafContent::Regular( + RegularFile::External(id, size) | RegularFile::ExternalNoVerity(id, size), + ) => { let object = repo.open_object(id)?; // TODO: make this better. At least needs to be EINTR-safe. Could even do reflink in some cases. // Regardless we shouldn't read the whole file into memory. @@ -315,6 +317,9 @@ fn write_leaf( let (data, _) = read(object, &mut buffer)?; set_file_contents(dirfd, name, &leaf.stat, data)?; } + LeafContent::Regular(RegularFile::Sparse(..)) => { + set_file_contents(dirfd, name, &leaf.stat, &[])?; + } LeafContent::BlockDevice(rdev) => mknodat(dirfd, name, FileType::BlockDevice, mode, *rdev)?, LeafContent::CharacterDevice(rdev) => { mknodat(dirfd, name, FileType::CharacterDevice, mode, *rdev)? @@ -707,7 +712,7 @@ pub fn read_file( ) -> Result> { match file { RegularFile::Inline(data) => Ok(data.clone()), - RegularFile::External(id, size) => { + RegularFile::External(id, size) | RegularFile::ExternalNoVerity(id, size) => { let capacity: usize = (*size).try_into().context("file too large for memory")?; let mut data = Vec::with_capacity(capacity); std::fs::File::from(repo.open_object(id)?).read_to_end(&mut data)?; @@ -717,6 +722,7 @@ pub fn read_file( ); Ok(data.into_boxed_slice()) } + RegularFile::Sparse(..) => Ok(Box::new([])), } } diff --git a/crates/composefs/src/lib.rs b/crates/composefs/src/lib.rs index 1bc5752b..806f3c8f 100644 --- a/crates/composefs/src/lib.rs +++ b/crates/composefs/src/lib.rs @@ -46,6 +46,8 @@ pub const INLINE_CONTENT_MAX_V0: usize = 64; /// Maximum inline content size accepted when parsing untrusted input (dumpfiles, /// EROFS images in composefs-restricted mode). /// +/// Only enforced for v2 images; the C code does not check this limit. +/// /// This is intentionally higher than [`INLINE_CONTENT_MAX_V0`] to allow for future /// increases to the inline threshold (see /// ). @@ -53,6 +55,8 @@ pub const MAX_INLINE_CONTENT: usize = 512; /// Maximum symlink target length in bytes. /// +/// Only enforced for v2 images; the C code does not check this limit. +/// /// XFS limits symlink targets to 1024 bytes (`XFS_SYMLINK_MAXLEN`). Since /// generic Linux containers are commonly backed by XFS, we enforce that /// limit rather than the Linux VFS `PATH_MAX` of 4096. diff --git a/crates/composefs/src/mount.rs b/crates/composefs/src/mount.rs index c485cefb..335040a9 100644 --- a/crates/composefs/src/mount.rs +++ b/crates/composefs/src/mount.rs @@ -126,6 +126,18 @@ pub fn erofs_mount(image: OwnedFd) -> Result { )?) } +/// Controls fs-verity enforcement for overlay file data. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +pub enum VerityRequirement { + /// Do not require fs-verity. + #[default] + Disabled, + /// Require fs-verity; fail if the kernel does not support it. + Required, + /// Try to enable fs-verity; silently continue if unsupported. + Try, +} + /// Options controlling how a composefs image is mounted. #[derive(Debug, Default)] #[non_exhaustive] @@ -133,6 +145,8 @@ pub struct MountOptions { /// Overlay upper layer and work directory: (upperdir, workdir). upperdirs: Option<(OwnedFd, OwnedFd)>, read_write: bool, + /// User namespace file descriptor for ID-mapped mounts. + idmap_fd: Option, } impl MountOptions { @@ -147,20 +161,26 @@ impl MountOptions { self.read_write = read_write; self } + + /// Set a user namespace file descriptor for ID-mapped mounts. + pub fn set_idmap(&mut self, fd: OwnedFd) -> &mut Self { + self.idmap_fd = Some(fd); + self + } } -/// Creates a composefs mount using overlayfs with an erofs image and base directory. +/// Creates a composefs mount using overlayfs with an erofs image and base directories. /// /// This mounts a composefs image by creating an overlayfs that layers the erofs image -/// (as the lower layer) over a base directory (as the data layer). The overlayfs is +/// (as the lower layer) over base directories (as data layers). The overlayfs is /// configured with metacopy and redirect_dir enabled for composefs functionality. /// /// # Arguments /// /// * `image` - File descriptor for the composefs erofs image /// * `name` - Name for the mount source (appears as "composefs:{name}") -/// * `basedir` - File descriptor for the base directory containing the actual file data -/// * `enable_verity` - Whether to require fs-verity verification for all files +/// * `basedirs` - File descriptors for the base directories containing actual file data +/// * `verity` - Whether and how to enforce fs-verity verification for overlay files /// * `options` - Mount options controlling overlay and read-write behaviour /// /// # Returns @@ -170,24 +190,38 @@ impl MountOptions { pub fn composefs_fsmount( image: OwnedFd, name: &str, - basedir: impl AsFd, - enable_verity: bool, + basedirs: &[BorrowedFd<'_>], + verity: VerityRequirement, options: &MountOptions, ) -> Result { - let erofs_mnt = prepare_mount(erofs_mount(image)?)?; + let erofs_mnt = erofs_mount(image)?; + if let Some(idmap_fd) = &options.idmap_fd { + composefs_ioctls::mount::mount_setattr_idmap(erofs_mnt.as_fd(), idmap_fd.as_fd())?; + } + let erofs_mnt = prepare_mount(erofs_mnt)?; let overlayfs = FsHandle::open("overlay")?; fsconfig_set_string(overlayfs.as_fd(), "source", format!("composefs:{name}"))?; fsconfig_set_string(overlayfs.as_fd(), "metacopy", "on")?; fsconfig_set_string(overlayfs.as_fd(), "redirect_dir", "on")?; - if enable_verity { - fsconfig_set_string(overlayfs.as_fd(), "verity", "require")?; + match verity { + VerityRequirement::Disabled => {} + VerityRequirement::Required => { + fsconfig_set_string(overlayfs.as_fd(), "verity", "require")?; + } + VerityRequirement::Try => { + match fsconfig_set_string(overlayfs.as_fd(), "verity", "require") { + Ok(()) => {} + Err(rustix::io::Errno::INVAL) | Err(rustix::io::Errno::NOSYS) => {} + Err(e) => return Err(e.into()), + } + } } if let Some((upperdir, workdir)) = &options.upperdirs { overlayfs_set_fd(overlayfs.as_fd(), "upperdir", upperdir.as_fd())?; overlayfs_set_fd(overlayfs.as_fd(), "workdir", workdir.as_fd())?; } - overlayfs_set_lower_and_data_fds(&overlayfs, &erofs_mnt, Some(&basedir))?; + overlayfs_set_lower_and_data_fds(&overlayfs, &erofs_mnt, basedirs)?; fsconfig_create(overlayfs.as_fd())?; let mount_attr = if options.read_write { diff --git a/crates/composefs/src/mountcompat.rs b/crates/composefs/src/mountcompat.rs index 292ad539..2ab14fe5 100644 --- a/crates/composefs/src/mountcompat.rs +++ b/crates/composefs/src/mountcompat.rs @@ -35,11 +35,11 @@ pub fn overlayfs_set_fd(fs_fd: BorrowedFd, key: &str, fd: BorrowedFd) -> rustix: pub fn overlayfs_set_lower_and_data_fds( fs_fd: impl AsFd, lower: impl AsFd, - data: Option, + data_fds: &[BorrowedFd<'_>], ) -> rustix::io::Result<()> { overlayfs_set_fd(fs_fd.as_fd(), "lowerdir+", lower.as_fd())?; - if let Some(data) = data { - overlayfs_set_fd(fs_fd.as_fd(), "datadir+", data.as_fd())?; + for data_fd in data_fds { + overlayfs_set_fd(fs_fd.as_fd(), "datadir+", *data_fd)?; } Ok(()) } @@ -84,17 +84,16 @@ pub fn overlayfs_set_fd(fs_fd: BorrowedFd, key: &str, fd: BorrowedFd) -> rustix: pub fn overlayfs_set_lower_and_data_fds( fs_fd: impl AsFd, lower: impl AsFd, - data: Option, + data_fds: &[BorrowedFd<'_>], ) -> rustix::io::Result<()> { use std::os::fd::AsRawFd; let lower_fd = lower.as_fd().as_raw_fd().to_string(); - let arg = if let Some(data) = data { - let data_fd = data.as_fd().as_raw_fd().to_string(); - format!("/proc/self/fd/{lower_fd}::/proc/self/fd/{data_fd}") - } else { - format!("/proc/self/fd/{lower_fd}") - }; + let mut arg = format!("/proc/self/fd/{lower_fd}"); + for data_fd in data_fds { + let raw = data_fd.as_raw_fd().to_string(); + arg.push_str(&format!("::/proc/self/fd/{raw}")); + } rustix::mount::fsconfig_set_string(fs_fd.as_fd(), "lowerdir", arg) } diff --git a/crates/composefs/src/repository.rs b/crates/composefs/src/repository.rs index 915e2302..be6d7f42 100644 --- a/crates/composefs/src/repository.rs +++ b/crates/composefs/src/repository.rs @@ -113,7 +113,7 @@ use crate::{ FsVerityHasher, MeasureVerityError, compute_verity, enable_verity_maybe_copy, ensure_verity_equal, has_verity, measure_verity, measure_verity_opt, }, - mount::{MountOptions, composefs_fsmount, mount_at}, + mount::{MountOptions, VerityRequirement, composefs_fsmount, mount_at}, shared_internals::IO_BUF_CAPACITY, splitstream::{SplitStreamReader, SplitStreamWriter}, util::{ErrnoFilter, proc_self_fd, reopen_tmpfile_ro, replace_symlinkat}, @@ -2524,16 +2524,17 @@ impl Repository { #[context("Mounting image '{name}'")] pub fn mount_with_options(&self, name: &str, options: &MountOptions) -> Result { let (image, enable_verity) = self.open_image(name)?; + let objects = self + .objects_dir() + .context("Getting objects directory for mount")?; + let verity = if enable_verity { + VerityRequirement::Required + } else { + VerityRequirement::Disabled + }; - composefs_fsmount( - image, - name, - self.objects_dir() - .context("Getting objects directory for mount")?, - enable_verity, - options, - ) - .context("Creating filesystem mount") + composefs_fsmount(image, name, &[objects.as_fd()], verity, options) + .context("Creating filesystem mount") } /// Create a detached read-only mount of an image. diff --git a/crates/composefs/src/tree.rs b/crates/composefs/src/tree.rs index ddfc61bf..59b2ee32 100644 --- a/crates/composefs/src/tree.rs +++ b/crates/composefs/src/tree.rs @@ -17,7 +17,15 @@ pub enum RegularFile { /// File stored externally, referenced by fsverity hash and size. /// /// The tuple contains (fsverity hash, file size in bytes). + /// The fsverity digest is embedded in the overlay metacopy xattr. External(ObjectID, u64), + /// Like `External`, but without embedding the fsverity digest in the + /// overlay metacopy xattr. Used by the C API when the caller set a + /// content-address payload but did not explicitly set a verified digest. + ExternalNoVerity(ObjectID, u64), + /// File with declared size but no content or external reference. + /// Produces ChunkBased layout with null chunk indices. + Sparse(u64), } // Re-export generic types. Note that we don't need to re-write diff --git a/crates/composefs/tests/mkfs.rs b/crates/composefs/tests/mkfs.rs index da38b57a..9efd5936 100644 --- a/crates/composefs/tests/mkfs.rs +++ b/crates/composefs/tests/mkfs.rs @@ -249,6 +249,37 @@ fn test_erofs_v1_digest_stability() { } } +/// Test that V0 with a whiteout (char device rdev=0) round-trips correctly. +/// Whiteouts are escaped to regular files in both V0 and V1 Epoch1 formats. +#[test] +fn test_v0_whiteout_round_trip() { + let mut fs = FileSystem::::new(default_stat()); + add_leaf(&mut fs, "whiteout", LeafContent::CharacterDevice(0)); + add_leaf( + &mut fs, + "regular", + LeafContent::Regular(RegularFile::Inline((*b"hello").into())), + ); + + let image = mkfs_erofs_versioned( + &mut ValidatedFileSystem::new(fs).unwrap(), + FormatVersion::V0, + ); + + // The image must be parseable + let rt_fs = + composefs::erofs::reader::erofs_to_filesystem::(&image[..]).unwrap(); + + // The whiteout should round-trip as CharacterDevice(0) + let mut dump_bytes = Vec::new(); + composefs::dumpfile::write_dumpfile(&mut dump_bytes, &rt_fs).unwrap(); + let dump = String::from_utf8(dump_bytes).unwrap(); + assert!( + dump.contains("/whiteout"), + "whiteout entry missing from dump:\n{dump}" + ); +} + /// Test that `--min-version=1` forces `composefs_version=1` in the EROFS header /// even when no user-visible whiteout devices are present, matching C mkcomposefs /// `--min-version=1 --max-version=1` behaviour. diff --git a/man/composefs-dump.md b/man/composefs-dump.md new file mode 100644 index 00000000..07ec3d27 --- /dev/null +++ b/man/composefs-dump.md @@ -0,0 +1,120 @@ +% composefs-dump 5 "" composefs "User Commands" + +# NAME + +composefs-dump - textual file format for composefs content + +# DESCRIPTION + +Both the *composefs-info* and the *mkcompose* commands support +generation/consumptions of a textual descriptions of the contents of a +composefs image. This can be used to inspect or modify an image, or to +generate an image without having to have a local directory with the +files in it. + +The file format is very simple, with one file per line, first with a +11 fixed fields, followed by a variable number of extended attributes +for the file. It is recommended, but not required to include a trailing +newline for the last file. + +Fields are separated by a single space, and lines by a single +newline. Extended attributes further use '=' to separate key from +value. Therefore all these characters, as well as non-printable +characters are escaped in the fields ('=' only in xattr fields). +Also, back-slashes have to be escaped as they are used as the +escape mechanism. + +Escapes are of the form \xXY which escapes a single byte using two hex +digits. For example \x00 is the zero byte and \xff is the 255 byte. +Optionally, these custom escapes are supported: + + **\\\\** + : backslash. + + **\\n** + : newline. + + **\\r** + : carriage return. + + **\\t** + : tab + + +Optional fields that are not set contain '-', and if a field actually +has that particular value it is escaped. + +The fixed fields on a line are (all numbers in base 10 unless +otherwise specified): + +**PATH** +: The full, absolute path of the file in the image. Any directories + used as prefix in the path must have been in the file before this + line. + +**SIZE** +: The size of the file. This is ignored for directories. + +**MODE** +: The st_mode stat field the file in octal, which includes both the + permissions and the file type. + + Additionally, if the file is a hardlink, then this field will + start with a single '@' character, and the payload field points + to the target file. Note that all other fields are typically + filled out for a hardlink as the target, but for generation + of a new file we ignore all the fields except the payload. + +**NLINK** +: The st_nlink stat field. + +**UID** +: The owner uid. + +**GID** +: The owner gid. + +**RDEV** +: The st_rdev stat field. + +**MTIME** +: The modification time in seconds and nanoseconds since the unix + epoch, separated by '.'. Note this is not a float, "1.1" means + one second and one nanosecond. + +**PAYLOAD** +: The payload of the file. For symbolic links this means the symlink + targets. For regular files this is the relative pathname for the + backing files. For hardlinks (see **MODE**), this is the path of + another file in this file that this is a hardlink of. + +**CONTENT** +: Small files can inline the actual content in the composefs + image. This contains an escaped version of the content. + This must match the size specified in **SIZE** + +**DIGEST** +: A fs-verity digest for the file (only used for regular files, and + not if *CONTENT* is set) that will be validated against backing + files when used. + +After the fixed fields comes the xattrs, escaped and space-separated in the form +**KEY**=**VALUE**. Note that '=' must be escaped in **KEY**. + + +# EXAMPLE + +``` +/ 4096 40755 4 1000 1000 0 1695372970.944925700 - - - security.selinux=unconfined_u:object_r:unlabeled_t:s0\x00 +/a\x20dir\x20w\x20space 27 40755 2 1000 1000 0 1694598852.869646118 - - - security.selinux=unconfined_u:object_r:unlabeled_t:s0\x00 +/a-dir 45 40755 2 1000 1000 0 1674041780.601887980 - - - security.selinux=unconfined_u:object_r:unlabeled_t:s0\x00 +/a-dir/a-file 259 100644 1 1000 1000 0 1695368732.385062094 35/d02f81325122d77ec1d11baba655bc9bf8a891ab26119a41c50fa03ddfb408 - 35d02f81325122d77ec1d11baba655bc9bf8a891ab26119a41c50fa03ddfb408 security.selinux=unconfined_u:object_r:unlabeled_t:s0\x00 +/a-hardlink 259 @100644 1 1000 1000 0 1695368732.385062094 /a-dir/a-file - 35d02f81325122d77ec1d11baba655bc9bf8a891ab26119a41c50fa03ddfb408 security.selinux=unconfined_u:object_r:unlabeled_t:s0\x00 +/inline.txt 10 100644 1 1000 1000 0 1697019909.446146440 - some-text\n - security.selinux=unconfined_u:object_r:unlabeled_t:s0\x00 +``` + +# SEE ALSO + +**composefs-info(1)**, **mkcomposefs(1)** + +[composefs upstream](https://github.com/composefs/composefs-rs) diff --git a/man/composefs-info.md b/man/composefs-info.md new file mode 100644 index 00000000..89a84b5f --- /dev/null +++ b/man/composefs-info.md @@ -0,0 +1,59 @@ +% composefs-info 1 "" composefs "User Commands" + +# NAME + +composefs-info - print information about a composefs image + +# SYNOPSIS +**composefs-info** [ls|objects|missing-objects|dump] *IMAGE* [*IMAGE2* *IMAGE3* ...] + +# DESCRIPTION + +The composefs-info command lets you inspect a composefs image. It has +several sub-commands: + +**ls** +: Prints a simple list of the files and directorie in the images as + well as their backing file or symlink target. + +**objects** +: Prints a list of all the backing files referenced by the images, + in sorted order. + +**missing-objects** +: Prints a list of all the missing backing files referenced by the + images, in sorted order, given a backing file store passed in + using the --basedir option. + +**dump** +: Prints a full dump of the images in a line based textual format. + See **composefs-dump(5)** for more details. This format is also + accepted as input to mkcomposefs if the --from-file + option is used. + +**measure-file** +: Interpret the provided paths as generic files, and print their fsverity digest. + +# OPTIONS + +The provided *IMAGE* argument must be a composefs file. Multiple images +can be specified. + +**compoosefs-info** accepts the following options: + + +**\-\-basedir**=*PATH* +: This should point to a directory of backing files, and will be used + by the **missing-objects** command to know what files are available. + +**\-\-filter**=*NAME* +: Only print entries whose name matches one of these. Can be specified + multiple times. Cannot be a full path (e.g. cannot contain `/`). + This is intended to be used for tooling to efficiently lookup metadata + files embedded in the image without loading and printing the entire + image. + +# SEE ALSO +**composefs-info(1)**, **composefs-dump(5)** + +[composefs upstream](https://github.com/composefs/composefs-rs) diff --git a/man/mkcomposefs.md b/man/mkcomposefs.md new file mode 100644 index 00000000..4c909200 --- /dev/null +++ b/man/mkcomposefs.md @@ -0,0 +1,131 @@ +% mkcomposefs 1 "" composefs "User Commands" + +# NAME + +mkcomposefs - create a composefs filesystem image + +# SYNOPSIS +**mkcomposefs** *SOURCE* *IMAGE* + +# DESCRIPTION + +The composefs project uses EROFS image file to store metadata, and one +or more separate directories containing content-addressed backing data +for regular files. + +**mkcomposefs** constructs the mountable "composefs image" using the +source as input. It can also create the backing store directory. +Typically the source is a directory, but with *--from-file* it can +also be a file. + +# OPTIONS + +The provided *SOURCEDIR* argument must be a directory and its entire +contents will be read recursively. The provided *IMAGE* argument +will be a mountable composefs image. + +**mkcomposefs** accepts the following options: + + +**\-\-digest-store**=*PATH* +: This path will become a composefs "object store". Regular files + larger than 64 bytes in the *SOURCEDIR* will be copied (reflinked + if possible) into this target directory, named after their + fsverity digest. Small files will be inlined into the image + metadata. If possible, the added files will have fs-verity + enabled. + + This directory should be passed to the basedir option when you + mount the image. + +**\-\-print-digest** +: Print the fsverity digest of the composefs metadata file. + +**\-\-print-digest-only** +: Print the fsverity digest of the composefs metadata file, but + don't write the image. If this is passed, the *IMAGE* argument should + be left out. + +**\-\-use-epoch** +: Use a zero time (unix epoch) as the modification time for all files. + +**\-\-skip-devices** +: Don't add device nodes to the image. + +**\-\-skip-xattrs** +: Don't add xattrs to files in the image. + +**\-\-user-xattrs** +: Only add xattrs with the "user." prefix to files in the image. + +**\-\-from-file** +: The source is a file in the **composefs-dump(5)** format. If + the specified file is "-", the data is read from stdin. + +**\-\-version** +: The base version to use for the image format. + +**\-\-max-version** +: If this specifies a version higher than \-\-version, then the + actual image format version used will be adjusted upwards if that + is beneficial for the image, up to the max version. + +**\-\-threads**=*count* +: Number of threads to be used to calculate the file digests and copy. + Default thread count is the number of processors when *--threads* is not specified. + +# FORMAT VERSIONING + +Composefs images are binary reproduceable, meaning that for a given +input the result is always the same, giving the same digest of the +image. This is important as the digest is used to validate the image, +even if the image was re-created rather than transferred as +is. However, sometimes the format needs to be changed, such as for +example when a new type of file is introduced or a bug is fixed. This +is handled by introducing a format version. + +Specifying the version is done with two options, the base version +(\-\-version) and the max version (\-\-max-version). When building an +image, mkcomposefs tries to keep the image format as low as possible, +but if some particular requested feature is not available with the +base feature, but is accessible in the max version then the version +used will be increased. This allows us to introduce new features and +fix bugs in a later version and migrate to that using max versions, +but still keeping the digests identical for unaffected images. + +If you need 100% binary reproducibility over time, specify the same +version and a max version each time. + +Format version history: + +- 0 - Initial version +- 1 - Supports overlay whiteout files in the image (added in 1.0.3) + +The default if no version arguments are specified is version 0 and max +version 1. + +# SANDBOXING RECOMMENDATIONS + +This tool essentially just processes an input directory or +text file and writes a file. It does not require any privileges +at all. If you are invoking this as part of an otherwise privileged +process (such as a container runtime) we recommend dropping privileges +before invoking `mkcomposefs`. + +Especially if you are using `--from-file`, you can run this +as an unprivileged uid that has no writable filesystem access at +all except to a passed file descriptor. For example, you can pass +a writable file descriptor for the desired target file as fd 3, +and run `mkcomposefs --from-file - /proc/self/fd/3` +to effectively use `mkcomposefs` as part of a pipeline. + +An example simple sandboxing starting from root is `setpriv --nnp --reuid nobody -- mkcomposefs ...`. +Another is to use `systemd-run -P DynamicUser=yes -P ProtectSystem=strict`. +Yet another (especially if your code is already part of a container runtime) +is to use that runtime's existing functionality (seccomp, unsharing user namespace, +mounting a restricted subset of the rootfs, etc. + +# SEE ALSO +**composefs-info(1)**, **mount.composefs(1)**, **composefs-dump(5)** + +[composefs upstream](https://github.com/composefs/composefs-rs) diff --git a/man/mount.composefs.md b/man/mount.composefs.md new file mode 100644 index 00000000..b54e09bd --- /dev/null +++ b/man/mount.composefs.md @@ -0,0 +1,73 @@ +% mount.composefs 8 "" composefs "User Commands" + +# NAME + +mount.composefs - mount a composefs filesystem image + +# SYNOPSIS + +**mount.composefs** [-o OPTIONS] *IMAGE* *TARGETDIR* + +# DESCRIPTION + +The composefs project uses EROFS image file to store metadata, and one +or more separate directories containing content-addressed backing data +for regular files. + +**mount.composefs** mounts such an EROFS file in combination with a given +set of basedir at the specified location. It can be called directly, or +as a mount helper by running `mount -t composefs ...`. + +# OPTIONS + +The provided *IMAGE* argument must be a valid composefs (EROFS) +metadata image. The *TARGETDIR* will be used as a mount target. + +**mount.composefs** accepts the following colon-separated mount +options when passed via the `-o OPTIONS` argument. + +**basedir**=*PATH* +: This path will be used to resolve non-empty file references + stored in the composefs metadata image. A primary use case is to have + this be the same path provided to `mkcomposefs --digest-store=PATH`. + + Multiple paths can be specified, separated by `:`. + +**digest**=*DIGEST* +: The image file is validated to have the specified fs-verity digest + before being used. This allows a chain of trust the ensures only + the expected data is ever visible in the mount. + + This option also implies **verity**. + +**idmap**=*PATH* +: Specify a path to a user namespace whose ID mapping should be used. + The typical format for this type of path is `/proc//ns/user` + +**verity** +: If this is specified, all files in the *IMAGE* must specify an fs-verity + digest, and all the files in the base dirs must have a matching fs-verity + digest. + + Note: This needs support for the overlayfs "verity" option in the + kernel, which was added in 6.6rc1. + +**ro** +: Mounts the filesystem read-only. This is mainly useful when using + **upperdir** as unlayered composefs images are naturally readonly. + +**rw** +: Overrides a previous **ro** option + +**upperdir** +: Specify an upper dir in the overlayfs mount that composefs uses. This allows + a writable layer on top of the composefs image. See overlayfs docs for details. + +**workdir** +: Specifies an overlayfs workdir to go with **upperdir**. + +# SEE ALSO + +**composefs-info(1)**, **mount.composefs(1)** + +[composefs upstream](https://github.com/composefs/composefs-rs)