From 6caf05fe4b7d0e3449aa37b7b37d526167f68329 Mon Sep 17 00:00:00 2001 From: Allison Karlitskaya Date: Fri, 15 Nov 2024 16:54:30 +0100 Subject: [PATCH] fs: Add code to import from a filesystem src/fs.rs contains code for writing the in-memory filesystem tree to a directory on disk, so let's add the other direction: converting an on-disk directory to an in-memory filesystem tree. This will let us scan container images from inside containers. This is necessary because we can't get access to the OCI layer tarballs during a container build (even from a later stage in a multi-stage build) but we can bindmount the root filesystem. See https://github.com/containers/buildah/issues/5837 With our recent changes to how we handle metadata on the root directory we should now be producing the same image on the inside and the outside, which gives us a nice way to produce a UKI with a built-in `composefs=` command-line parameter. Add a new 'unified' example. This does the container build as a single `podman build` command with no special arguments. Closes #34 --- examples/unified/.gitignore | 5 + examples/unified/Containerfile | 48 ++++ examples/unified/build | 35 +++ examples/unified/empty | 0 examples/unified/extra/etc/resolv.conf | 1 + .../lib/dracut/dracut.conf.d/37composefs.conf | 6 + .../composefs-pivot-sysroot.service | 34 +++ .../modules.d/37composefs/module-setup.sh | 20 ++ .../kernel/install.conf.d/37composefs.conf | 2 + .../usr/lib/systemd/network/37-wired.network | 9 + .../37-composefs.conf | 6 + examples/unified/fix-verity | 59 +++++ examples/unified/make-image | 19 ++ examples/unified/repart.d/01-esp.conf | 6 + examples/unified/repart.d/02-sysroot.conf | 6 + examples/unified/run | 12 + src/bin/cfsctl.rs | 17 +- src/fs.rs | 235 +++++++++++++++++- 18 files changed, 514 insertions(+), 6 deletions(-) create mode 100644 examples/unified/.gitignore create mode 100644 examples/unified/Containerfile create mode 100755 examples/unified/build create mode 100644 examples/unified/empty create mode 120000 examples/unified/extra/etc/resolv.conf create mode 100644 examples/unified/extra/usr/lib/dracut/dracut.conf.d/37composefs.conf create mode 100644 examples/unified/extra/usr/lib/dracut/modules.d/37composefs/composefs-pivot-sysroot.service create mode 100755 examples/unified/extra/usr/lib/dracut/modules.d/37composefs/module-setup.sh create mode 100644 examples/unified/extra/usr/lib/kernel/install.conf.d/37composefs.conf create mode 100644 examples/unified/extra/usr/lib/systemd/network/37-wired.network create mode 100644 examples/unified/extra/usr/lib/systemd/system/systemd-growfs-root.service.d/37-composefs.conf create mode 100755 examples/unified/fix-verity create mode 100755 examples/unified/make-image create mode 100644 examples/unified/repart.d/01-esp.conf create mode 100644 examples/unified/repart.d/02-sysroot.conf create mode 100755 examples/unified/run diff --git a/examples/unified/.gitignore b/examples/unified/.gitignore new file mode 100644 index 0000000..acef31e --- /dev/null +++ b/examples/unified/.gitignore @@ -0,0 +1,5 @@ +/cfsctl +/extra/usr/lib/dracut/modules.d/37composefs/composefs-pivot-sysroot +/fix-verity.efi +/image.qcow2 +/tmp/ diff --git a/examples/unified/Containerfile b/examples/unified/Containerfile new file mode 100644 index 0000000..3073ed5 --- /dev/null +++ b/examples/unified/Containerfile @@ -0,0 +1,48 @@ +# Need 6.12 kernel from rawhide +FROM fedora:rawhide AS base +COPY extra / +COPY cfsctl /usr/bin +RUN --mount=type=cache,target=/var/cache/libdnf5 < /etc/kernel/cmdline +EOF +RUN --mount=type=cache,target=/var/cache/libdnf5 < tmp/efi/loader/loader.conf +mkdir -p tmp/efi/EFI/BOOT tmp/efi/EFI/systemd +cp /usr/lib/systemd/boot/efi/systemd-bootx64.efi tmp/efi/EFI/systemd +cp /usr/lib/systemd/boot/efi/systemd-bootx64.efi tmp/efi/EFI/BOOT/BOOTX64.EFI +${CFSCTL} oci prepare-boot "${IMAGE_ID}" tmp/efi + +fakeroot ./make-image +qemu-img convert -f raw tmp/image.raw -O qcow2 image.qcow2 +./fix-verity image.qcow2 # https://github.com/tytso/e2fsprogs/issues/201 diff --git a/examples/unified/empty b/examples/unified/empty new file mode 100644 index 0000000..e69de29 diff --git a/examples/unified/extra/etc/resolv.conf b/examples/unified/extra/etc/resolv.conf new file mode 120000 index 0000000..697ba64 --- /dev/null +++ b/examples/unified/extra/etc/resolv.conf @@ -0,0 +1 @@ +../run/systemd/resolve/stub-resolv.conf \ No newline at end of file diff --git a/examples/unified/extra/usr/lib/dracut/dracut.conf.d/37composefs.conf b/examples/unified/extra/usr/lib/dracut/dracut.conf.d/37composefs.conf new file mode 100644 index 0000000..1defe5d --- /dev/null +++ b/examples/unified/extra/usr/lib/dracut/dracut.conf.d/37composefs.conf @@ -0,0 +1,6 @@ +# we want to make sure the virtio disk drivers get included +hostonly=no + +# we need to force these in via the initramfs because we don't have modules in +# the base image +force_drivers+=" virtio_net vfat " diff --git a/examples/unified/extra/usr/lib/dracut/modules.d/37composefs/composefs-pivot-sysroot.service b/examples/unified/extra/usr/lib/dracut/modules.d/37composefs/composefs-pivot-sysroot.service new file mode 100644 index 0000000..3ba0562 --- /dev/null +++ b/examples/unified/extra/usr/lib/dracut/modules.d/37composefs/composefs-pivot-sysroot.service @@ -0,0 +1,34 @@ +# Copyright (C) 2013 Colin Walters +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library. If not, see . + +[Unit] +DefaultDependencies=no +ConditionKernelCommandLine=composefs +ConditionPathExists=/etc/initrd-release +After=sysroot.mount +Requires=sysroot.mount +Before=initrd-root-fs.target +Before=initrd-switch-root.target + +OnFailure=emergency.target +OnFailureJobMode=isolate + +[Service] +Type=oneshot +ExecStart=/usr/bin/composefs-pivot-sysroot +StandardInput=null +StandardOutput=journal +StandardError=journal+console +RemainAfterExit=yes diff --git a/examples/unified/extra/usr/lib/dracut/modules.d/37composefs/module-setup.sh b/examples/unified/extra/usr/lib/dracut/modules.d/37composefs/module-setup.sh new file mode 100755 index 0000000..c4186c6 --- /dev/null +++ b/examples/unified/extra/usr/lib/dracut/modules.d/37composefs/module-setup.sh @@ -0,0 +1,20 @@ +#!/usr/bin/bash + +check() { + return 0 +} + +depends() { + return 0 +} + +install() { + inst \ + "${moddir}/composefs-pivot-sysroot" /bin/composefs-pivot-sysroot + inst \ + "${moddir}/composefs-pivot-sysroot.service" \ + "${systemdsystemunitdir}/composefs-pivot-sysroot.service" + + $SYSTEMCTL -q --root "${initdir}" add-wants \ + 'initrd-root-fs.target' 'composefs-pivot-sysroot.service' +} diff --git a/examples/unified/extra/usr/lib/kernel/install.conf.d/37composefs.conf b/examples/unified/extra/usr/lib/kernel/install.conf.d/37composefs.conf new file mode 100644 index 0000000..4d12c4e --- /dev/null +++ b/examples/unified/extra/usr/lib/kernel/install.conf.d/37composefs.conf @@ -0,0 +1,2 @@ +layout = uki +uki_generator = ukify diff --git a/examples/unified/extra/usr/lib/systemd/network/37-wired.network b/examples/unified/extra/usr/lib/systemd/network/37-wired.network new file mode 100644 index 0000000..e4e05fd --- /dev/null +++ b/examples/unified/extra/usr/lib/systemd/network/37-wired.network @@ -0,0 +1,9 @@ +[Match] +Type=ether + +[Link] +RequiredForOnline=routable + +[Network] +DHCP=yes + diff --git a/examples/unified/extra/usr/lib/systemd/system/systemd-growfs-root.service.d/37-composefs.conf b/examples/unified/extra/usr/lib/systemd/system/systemd-growfs-root.service.d/37-composefs.conf new file mode 100644 index 0000000..c387c18 --- /dev/null +++ b/examples/unified/extra/usr/lib/systemd/system/systemd-growfs-root.service.d/37-composefs.conf @@ -0,0 +1,6 @@ +# Make sure we grow the right root filesystem + +[Service] +ExecStart= +ExecStart=/usr/lib/systemd/systemd-growfs /sysroot + diff --git a/examples/unified/fix-verity b/examples/unified/fix-verity new file mode 100755 index 0000000..783a49a --- /dev/null +++ b/examples/unified/fix-verity @@ -0,0 +1,59 @@ +#!/bin/sh + +# workaround for https://github.com/tytso/e2fsprogs/issues/201 + +set -eux + +# We use a custom UKI with an initramfs containing a script that remounts +# /sysroot read-write and enables fs-verity on all of the objects in +# /composefs/objects. +# +# The first time we're run (or if we are modified) we (re-)generate the UKI. +# This is done inside of a container (for independence from the host OS). + +image_file="$1" + +if [ "$0" -nt fix-verity.efi ]; then + podman run --rm -i fedora > tmp/fix-verity.efi <<'EOF' + set -eux + + cat > /tmp/fix-verity.sh <<'EOS' + mount -o remount,rw /sysroot + ( + cd /sysroot/composefs/objects + echo >&2 'Enabling fsverity on composefs objects' + for i in */*; do + fsverity enable $i; + done + echo >&2 'done!' + ) + umount /sysroot + sync + poweroff -ff +EOS + + ( + dnf --setopt keepcache=1 install -y \ + kernel binutils systemd-boot-unsigned btrfs-progs fsverity-utils + dracut \ + --uefi \ + --no-hostonly \ + --install 'sync fsverity' \ + --include /tmp/fix-verity.sh /lib/dracut/hooks/pre-pivot/fix-verity.sh \ + --kver "$(rpm -q kernel-core --qf '%{VERSION}-%{RELEASE}.%{ARCH}')" \ + --kernel-cmdline="root=PARTLABEL=root-x86-64 console=ttyS0" \ + /tmp/fix-verity.efi + ) >&2 + + cat /tmp/fix-verity.efi +EOF + mv tmp/fix-verity.efi fix-verity.efi +fi + +qemu-system-x86_64 \ + -nographic \ + -m 4096 \ + -enable-kvm \ + -bios /usr/share/edk2/ovmf/OVMF_CODE.fd \ + -drive file="$1",if=virtio,media=disk \ + -kernel fix-verity.efi diff --git a/examples/unified/make-image b/examples/unified/make-image new file mode 100755 index 0000000..ff05a0f --- /dev/null +++ b/examples/unified/make-image @@ -0,0 +1,19 @@ +#!/bin/sh + +set -eux + +chown -R 0:0 tmp/sysroot +chcon -R system_u:object_r:usr_t:s0 tmp/sysroot/composefs +chcon system_u:object_r:var_t:s0 tmp/sysroot/var + +> tmp/image.raw +SYSTEMD_REPART_MKFS_OPTIONS_EXT4='-O verity' \ + systemd-repart \ + --empty=require \ + --size=auto \ + --dry-run=no \ + --no-pager \ + --offline=yes \ + --root=tmp \ + --definitions=repart.d \ + tmp/image.raw diff --git a/examples/unified/repart.d/01-esp.conf b/examples/unified/repart.d/01-esp.conf new file mode 100644 index 0000000..67f93e1 --- /dev/null +++ b/examples/unified/repart.d/01-esp.conf @@ -0,0 +1,6 @@ +[Partition] +Type=esp +Format=vfat +CopyFiles=/efi:/ +SizeMinBytes=512M +SizeMaxBytes=512M diff --git a/examples/unified/repart.d/02-sysroot.conf b/examples/unified/repart.d/02-sysroot.conf new file mode 100644 index 0000000..65f289e --- /dev/null +++ b/examples/unified/repart.d/02-sysroot.conf @@ -0,0 +1,6 @@ +[Partition] +Type=root +Format=ext4 +SizeMinBytes=10G +SizeMaxBytes=10G +CopyFiles=/sysroot:/ diff --git a/examples/unified/run b/examples/unified/run new file mode 100755 index 0000000..5742835 --- /dev/null +++ b/examples/unified/run @@ -0,0 +1,12 @@ +#!/bin/sh + +set -eux + +cd "${0%/*}" + +qemu-system-x86_64 \ + -m 4096 \ + -enable-kvm \ + -bios /usr/share/edk2/ovmf/OVMF_CODE.fd \ + -drive file=image.qcow2,if=virtio,cache=unsafe \ + -nic user,model=virtio-net-pci diff --git a/src/bin/cfsctl.rs b/src/bin/cfsctl.rs index 2437b2b..d62a03e 100644 --- a/src/bin/cfsctl.rs +++ b/src/bin/cfsctl.rs @@ -73,7 +73,9 @@ enum Command { /// Perform garbage collection GC, /// Imports a composefs image (unsafe!) - ImportImage { reference: String }, + ImportImage { + reference: String, + }, /// Commands for dealing with OCI layers Oci { #[clap(subcommand)] @@ -86,6 +88,12 @@ enum Command { /// the mountpoint mountpoint: String, }, + CreateImage { + path: PathBuf, + }, + CreateDumpfile { + path: PathBuf, + }, } fn main() -> Result<()> { @@ -165,6 +173,13 @@ fn main() -> Result<()> { oci::prepare_boot(&repo, name, None, &output)?; } }, + Command::CreateImage { ref path } => { + let image_id = composefs::fs::create_image(path, Some(&repo))?; + println!("{}", hex::encode(image_id)); + } + Command::CreateDumpfile { ref path } => { + composefs::fs::create_dumpfile(path)?; + } Command::Mount { name, mountpoint } => { repo.mount(&name, &mountpoint)?; } diff --git a/src/fs.rs b/src/fs.rs index cab30ff..5e7b08c 100644 --- a/src/fs.rs +++ b/src/fs.rs @@ -1,16 +1,31 @@ -use std::{ffi::OsStr, mem::MaybeUninit, path::Path}; +use std::{ + cell::RefCell, + collections::{BTreeMap, HashMap}, + ffi::OsString, + ffi::{CStr, OsStr}, + mem::MaybeUninit, + os::unix::ffi::{OsStrExt, OsStringExt}, + path::Path, + rc::Rc, +}; -use anyhow::Result; +use anyhow::{bail, ensure, Result}; use rustix::{ - fd::OwnedFd, - fs::{fdatasync, linkat, mkdirat, mknodat, openat, symlinkat, AtFlags, FileType, OFlags, CWD}, + fd::{AsFd, OwnedFd}, + fs::{ + fdatasync, fstat, getxattr, linkat, listxattr, mkdirat, mknodat, openat, readlinkat, + symlinkat, AtFlags, Dir, FileType, Mode, OFlags, CWD, + }, io::{read_uninit, write, Errno}, }; use crate::{ - image::{DirEnt, Directory, Inode, Leaf, LeafContent, Stat}, + fsverity::{digest::FsVerityHasher, Sha256HashValue}, + image::{DirEnt, Directory, FileSystem, Inode, Leaf, LeafContent, Stat}, repository::Repository, + selabel::selabel, util::proc_self_fd, + INLINE_CONTENT_MAX, }; fn set_file_contents(dirfd: &OwnedFd, name: &OsStr, stat: &Stat, data: &[u8]) -> Result<()> { @@ -97,7 +112,217 @@ fn write_directory_contents(dir: &Directory, fd: &OwnedFd, repo: &Repository) -> Ok(()) } +// NB: hardlinks not supported pub fn write_to_path(repo: &Repository, dir: &Directory, output_dir: &Path) -> Result<()> { let fd = openat(CWD, output_dir, OFlags::PATH | OFlags::DIRECTORY, 0.into())?; write_directory_contents(dir, &fd, repo) } + +pub struct FilesystemReader<'repo> { + st_dev: u64, + repo: Option<&'repo Repository>, + inodes: HashMap>, + root_mtime: i64, +} + +impl<'repo> FilesystemReader<'repo> { + fn read_xattrs(&mut self, fd: &OwnedFd) -> Result, Box<[u8]>>> { + // flistxattr() and fgetxattr() don't with with O_PATH fds, so go via /proc/self/fd. Note: + // we want the symlink-following version of this call, which produces the correct behaviour + // even when trying to read xattrs from symlinks themselves. See + // https://gist.github.com/allisonkarlitskaya/7a80f2ebb3314d80f45c653a1ba0e398 + let filename = proc_self_fd(fd); + + let mut xattrs = BTreeMap::new(); + + let names_size = listxattr(&filename, &mut [])?; + let mut names = vec![0; names_size]; + let actual_names_size = listxattr(&filename, &mut names)?; + ensure!( + actual_names_size == names.len(), + "xattrs changed during read" + ); + + let names: Vec = names.into_iter().map(|c| c as u8).collect(); // fml + + let mut buffer = [0; 65536]; + for name in names.split_inclusive(|c| *c == 0) { + let name = CStr::from_bytes_with_nul(name)?; + let value_size = getxattr(&filename, name, &mut buffer)?; + let key = Box::from(OsStr::from_bytes(name.to_bytes())); + let value = Box::from(&buffer[..value_size]); + xattrs.insert(key, value); + } + + Ok(xattrs) + } + + fn stat(&mut self, fd: &OwnedFd, ifmt: FileType) -> Result<(rustix::fs::Stat, Stat)> { + let buf = fstat(fd)?; + + ensure!( + FileType::from_raw_mode(buf.st_mode) == ifmt, + "File type changed + between readdir() and fstat()" + ); + + let mtime = buf.st_mtime as i64; + + if buf.st_dev != self.st_dev { + if self.st_dev == u64::MAX { + self.st_dev = buf.st_dev; + } else { + bail!("Attempting to cross devices while importing filesystem"); + } + } else { + // The root mtime is equal to the most recent mtime of any inode *except* the root + // directory. Because self.st_dev is unset at first, we know we're in this branch only + // if this is the second (or later) inode we process (ie: not the root directory). + if mtime > self.root_mtime { + self.root_mtime = mtime; + } + } + + Ok(( + buf, + Stat { + st_mode: buf.st_mode & 0o7777, + st_uid: buf.st_uid, + st_gid: buf.st_gid, + st_mtim_sec: mtime, + xattrs: RefCell::new(self.read_xattrs(fd)?), + }, + )) + } + + fn read_leaf_content(&mut self, fd: OwnedFd, buf: rustix::fs::Stat) -> Result { + let content = match FileType::from_raw_mode(buf.st_mode) { + FileType::Directory | FileType::Unknown => unreachable!(), + FileType::RegularFile => { + let mut buffer = vec![MaybeUninit::uninit(); buf.st_size as usize]; + let (data, _) = read_uninit(fd, &mut buffer)?; + + if buf.st_size > INLINE_CONTENT_MAX as i64 { + let id = if let Some(repo) = self.repo { + repo.ensure_object(data)? + } else { + FsVerityHasher::hash(data) + }; + LeafContent::ExternalFile(id, buf.st_size as u64) + } else { + LeafContent::InlineFile(Vec::from(data)) + } + } + FileType::Symlink => { + let target = readlinkat(fd, "", [])?; + LeafContent::Symlink(OsString::from_vec(target.into_bytes())) + } + FileType::CharacterDevice => LeafContent::CharacterDevice(buf.st_rdev), + FileType::BlockDevice => LeafContent::BlockDevice(buf.st_rdev), + FileType::Fifo => LeafContent::Fifo, + FileType::Socket => LeafContent::Socket, + }; + Ok(content) + } + + fn read_leaf(&mut self, dirfd: &OwnedFd, name: &OsStr, ifmt: FileType) -> Result> { + let oflags = match ifmt { + FileType::RegularFile => OFlags::RDONLY, + _ => OFlags::PATH, + }; + + let fd = openat( + dirfd, + name, + oflags | OFlags::NOFOLLOW | OFlags::CLOEXEC, + Mode::empty(), + )?; + + let (buf, stat) = self.stat(&fd, ifmt)?; + + if let Some(leafref) = self.inodes.get(&buf.st_ino) { + Ok(Rc::clone(leafref)) + } else { + let content = self.read_leaf_content(fd, buf)?; + let leaf = Rc::new(Leaf { stat, content }); + if buf.st_nlink > 1 { + self.inodes.insert(buf.st_ino, Rc::clone(&leaf)); + } + Ok(leaf) + } + } + + pub fn read_directory(&mut self, dirfd: impl AsFd, name: &OsStr) -> Result { + let fd = openat( + dirfd, + name, + OFlags::RDONLY | OFlags::DIRECTORY | OFlags::NOFOLLOW | OFlags::CLOEXEC, + Mode::empty(), + )?; + + let (_, stat) = self.stat(&fd, FileType::Directory)?; + let mut directory = Directory { + stat, + entries: vec![], + }; + + for item in Dir::read_from(&fd)? { + let entry = item?; + let name = OsStr::from_bytes(entry.file_name().to_bytes()); + + if name == "." || name == ".." { + continue; + } + + let inode = self.read_inode(&fd, name, entry.file_type())?; + directory.insert(name, inode); + } + + Ok(directory) + } + + fn read_inode(&mut self, dirfd: &OwnedFd, name: &OsStr, ifmt: FileType) -> Result { + if ifmt == FileType::Directory { + Ok(Inode::Directory(Box::new( + self.read_directory(dirfd, name)?, + ))) + } else { + Ok(Inode::Leaf(self.read_leaf(dirfd, name, ifmt)?)) + } + } +} + +pub fn read_from_path(path: &Path, repo: Option<&Repository>) -> Result { + let mut reader = FilesystemReader { + repo, + inodes: HashMap::new(), + st_dev: u64::MAX, + root_mtime: 0, + }; + let mut fs = FileSystem { + root: reader.read_directory(CWD, path.as_os_str())?, + }; + fs.root.stat.st_mtim_sec = reader.root_mtime; + + // We can only relabel if we have the repo because we need to read the config and policy files + if let Some(repo) = repo { + selabel(&mut fs, repo)?; + } + + Ok(fs) +} + +pub fn create_image(path: &Path, repo: Option<&Repository>) -> Result { + let fs = read_from_path(path, repo)?; + let image = super::image::mkcomposefs(fs)?; + if let Some(repo) = repo { + Ok(repo.write_image(None, &image)?) + } else { + Ok(FsVerityHasher::hash(&image)) + } +} + +pub fn create_dumpfile(path: &Path) -> Result<()> { + let fs = read_from_path(path, None)?; + super::dumpfile::write_dumpfile(&mut std::io::stdout(), &fs) +}