Skip to content

Commit

Permalink
Merge branch 'main' into add-test-process
Browse files Browse the repository at this point in the history
  • Loading branch information
sat0ken authored Oct 30, 2024
2 parents aef1f45 + 6717cbc commit 1926533
Show file tree
Hide file tree
Showing 25 changed files with 591 additions and 240 deletions.
414 changes: 205 additions & 209 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion crates/libcgroups/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ rbpf = { version = "0.3.0", optional = true }
libbpf-sys = { version = "1.4.5", optional = true }
errno = { version = "0.3.9", optional = true }
libc = { version = "0.2.161", optional = true }
thiserror = "1.0.64"
thiserror = "1.0.65"
tracing = { version = "0.1.40", features = ["attributes"] }

[dev-dependencies]
Expand Down
2 changes: 1 addition & 1 deletion crates/libcontainer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
rust-criu = "0.4.0"
regex = { version = "1.10.6", default-features = false, features = ["std", "unicode-perl"] }
thiserror = "1.0.64"
thiserror = "1.0.65"
tracing = { version = "0.1.40", features = ["attributes"] }
safe-path = "0.1.0"
nc = "0.9.5"
Expand Down
3 changes: 3 additions & 0 deletions crates/libcontainer/src/container/builder_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ pub(super) struct ContainerBuilderImpl {
pub detached: bool,
/// Default executes the specified execution of a generic command
pub executor: Box<dyn Executor>,
/// If do not use pivot root to jail process inside rootfs
pub no_pivot: bool,
}

impl ContainerBuilderImpl {
Expand Down Expand Up @@ -154,6 +156,7 @@ impl ContainerBuilderImpl {
cgroup_config,
detached: self.detached,
executor: self.executor.clone(),
no_pivot: self.no_pivot,
};

let (init_pid, need_to_clean_up_intel_rdt_dir) =
Expand Down
8 changes: 8 additions & 0 deletions crates/libcontainer/src/container/init_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pub struct InitContainerBuilder {
bundle: PathBuf,
use_systemd: bool,
detached: bool,
no_pivot: bool,
}

impl InitContainerBuilder {
Expand All @@ -31,6 +32,7 @@ impl InitContainerBuilder {
bundle,
use_systemd: true,
detached: true,
no_pivot: false,
}
}

Expand All @@ -45,6 +47,11 @@ impl InitContainerBuilder {
self
}

pub fn with_no_pivot(mut self, no_pivot: bool) -> Self {
self.no_pivot = no_pivot;
self
}

/// Creates a new container
pub fn build(self) -> Result<Container, LibcontainerError> {
let spec = self.load_spec()?;
Expand Down Expand Up @@ -95,6 +102,7 @@ impl InitContainerBuilder {
preserve_fds: self.base.preserve_fds,
detached: self.detached,
executor: self.base.executor,
no_pivot: self.no_pivot,
};

builder_impl.create()?;
Expand Down
1 change: 1 addition & 0 deletions crates/libcontainer/src/container/tenant_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ impl TenantContainerBuilder {
preserve_fds: self.base.preserve_fds,
detached: self.detached,
executor: self.base.executor,
no_pivot: false,
};

let pid = builder_impl.create()?;
Expand Down
2 changes: 2 additions & 0 deletions crates/libcontainer/src/process/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,6 @@ pub struct ContainerArgs {
pub detached: bool,
/// Manage the functions that actually run on the container
pub executor: Box<dyn Executor>,
/// If do not use pivot root to jail process inside rootfs
pub no_pivot: bool,
}
85 changes: 72 additions & 13 deletions crates/libcontainer/src/process/container_init_process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::path::{Path, PathBuf};
use std::{env, fs, mem};

use nc;
use nix::mount::MsFlags;
use nix::mount::{MntFlags, MsFlags};
use nix::sched::CloneFlags;
use nix::sys::stat::Mode;
use nix::unistd::{self, setsid, Gid, Uid};
Expand Down Expand Up @@ -270,6 +270,76 @@ fn reopen_dev_null() -> Result<()> {
Ok(())
}

// umount or hide the target path. If the target path is mounted
// try to unmount it first if the unmount operation fails with EINVAL
// then mount a tmpfs with size 0k to hide the target path.
fn unmount_or_hide(syscall: &dyn Syscall, target: impl AsRef<Path>) -> Result<()> {
let target_path = target.as_ref();
match syscall.umount2(target_path, MntFlags::MNT_DETACH) {
Ok(_) => Ok(()),
Err(SyscallError::Nix(nix::errno::Errno::EINVAL)) => syscall
.mount(
None,
target_path,
Some("tmpfs"),
MsFlags::MS_RDONLY,
Some("size=0k"),
)
.map_err(InitProcessError::SyscallOther),
Err(err) => Err(InitProcessError::SyscallOther(err)),
}
}

fn move_root(syscall: &dyn Syscall, rootfs: &Path) -> Result<()> {
unistd::chdir(rootfs).map_err(InitProcessError::NixOther)?;
// umount /sys and /proc if they are mounted, the purpose is to
// unmount or hide the /sys and /proc filesystems before the process changes its
// root to the new rootfs. thus ensure that the /sys and /proc filesystems are not
// accessible in the new rootfs. the logic is borrowed from crun
// https://github.com/containers/crun/blob/53cd1c1c697d7351d0cad23708d29bf4a7980a3a/src/libcrun/linux.c#L2780
unmount_or_hide(syscall, "/sys")?;
unmount_or_hide(syscall, "/proc")?;
syscall
.mount(Some(rootfs), Path::new("/"), None, MsFlags::MS_MOVE, None)
.map_err(|err| {
tracing::error!(?err, ?rootfs, "failed to mount ms_move");
InitProcessError::SyscallOther(err)
})?;

syscall.chroot(Path::new(".")).map_err(|err| {
tracing::error!(?err, ?rootfs, "failed to chroot");
InitProcessError::SyscallOther(err)
})?;

unistd::chdir("/").map_err(InitProcessError::NixOther)?;

Ok(())
}

fn do_pivot_root(
syscall: &dyn Syscall,
namespaces: &Namespaces,
no_pivot: bool,
rootfs: impl AsRef<Path>,
) -> Result<()> {
let rootfs_path = rootfs.as_ref();

let handle_error = |err: SyscallError, msg: &str| -> InitProcessError {
tracing::error!(?err, ?rootfs_path, msg);
InitProcessError::SyscallOther(err)
};

match namespaces.get(LinuxNamespaceType::Mount)? {
Some(_) if no_pivot => move_root(syscall, rootfs_path),
Some(_) => syscall
.pivot_rootfs(rootfs.as_ref())
.map_err(|err| handle_error(err, "failed to pivot root")),
None => syscall
.chroot(rootfs_path)
.map_err(|err| handle_error(err, "failed to chroot")),
}
}

// Some variables are unused in the case where libseccomp feature is not enabled.
#[allow(unused_variables)]
pub fn container_init_process(
Expand Down Expand Up @@ -343,18 +413,7 @@ pub fn container_init_process(
// we use pivot_root, but if we are on the host mount namespace, we will
// use simple chroot. Scary things will happen if you try to pivot_root
// in the host mount namespace...
if namespaces.get(LinuxNamespaceType::Mount)?.is_some() {
// change the root of filesystem of the process to the rootfs
syscall.pivot_rootfs(rootfs_path).map_err(|err| {
tracing::error!(?err, ?rootfs_path, "failed to pivot root");
InitProcessError::SyscallOther(err)
})?;
} else {
syscall.chroot(rootfs_path).map_err(|err| {
tracing::error!(?err, ?rootfs_path, "failed to chroot");
InitProcessError::SyscallOther(err)
})?;
}
do_pivot_root(syscall.as_ref(), &namespaces, args.no_pivot, rootfs_path)?;

// As we have changed the root mount, from here on
// logs are no longer visible in journalctl
Expand Down
5 changes: 5 additions & 0 deletions crates/libcontainer/src/syscall/linux.rs
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,11 @@ impl Syscall for LinuxSyscall {
}?;
Ok(())
}

fn umount2(&self, target: &Path, flags: MntFlags) -> Result<()> {
umount2(target, flags)?;
Ok(())
}
}

#[cfg(test)]
Expand Down
3 changes: 2 additions & 1 deletion crates/libcontainer/src/syscall/syscall.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use std::sync::Arc;

use caps::{CapSet, CapsHashSet};
use libc;
use nix::mount::MsFlags;
use nix::mount::{MntFlags, MsFlags};
use nix::sched::CloneFlags;
use nix::sys::stat::{Mode, SFlag};
use nix::unistd::{Gid, Uid};
Expand Down Expand Up @@ -54,6 +54,7 @@ pub trait Syscall {
size: libc::size_t,
) -> Result<()>;
fn set_io_priority(&self, class: i64, priority: i64) -> Result<()>;
fn umount2(&self, target: &Path, flags: MntFlags) -> Result<()>;
}

#[derive(Clone, Copy)]
Expand Down
28 changes: 27 additions & 1 deletion crates/libcontainer/src/syscall/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::path::{Path, PathBuf};
use std::sync::Arc;

use caps::{CapSet, CapsHashSet};
use nix::mount::MsFlags;
use nix::mount::{MntFlags, MsFlags};
use nix::sched::CloneFlags;
use nix::sys::stat::{Mode, SFlag};
use nix::unistd::{Gid, Uid};
Expand Down Expand Up @@ -44,6 +44,12 @@ pub struct IoPriorityArgs {
pub priority: i64,
}

#[derive(Clone, PartialEq, Eq, Debug)]
pub struct UMount2Args {
pub target: PathBuf,
pub flags: MntFlags,
}

#[derive(Default)]
struct Mock {
values: Vec<Box<dyn Any>>,
Expand All @@ -64,6 +70,7 @@ pub enum ArgName {
Groups,
Capability,
IoPriority,
UMount2,
}

impl ArgName {
Expand Down Expand Up @@ -259,6 +266,16 @@ impl Syscall for TestHelperSyscall {
Box::new(IoPriorityArgs { class, priority }),
)
}

fn umount2(&self, target: &Path, flags: MntFlags) -> Result<()> {
self.mocks.act(
ArgName::UMount2,
Box::new(UMount2Args {
target: target.to_owned(),
flags,
}),
)
}
}

impl TestHelperSyscall {
Expand Down Expand Up @@ -369,4 +386,13 @@ impl TestHelperSyscall {
.map(|x| x.downcast_ref::<IoPriorityArgs>().unwrap().clone())
.collect::<Vec<IoPriorityArgs>>()
}

pub fn get_umount_args(&self) -> Vec<UMount2Args> {
self.mocks
.fetch(ArgName::UMount2)
.values
.iter()
.map(|x| x.downcast_ref::<UMount2Args>().unwrap().clone())
.collect::<Vec<UMount2Args>>()
}
}
8 changes: 4 additions & 4 deletions crates/youki/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ default-features = false
features = ["std", "suggestions", "derive", "cargo", "help", "usage", "error-context"]

[dependencies]
anyhow = "1.0.90"
anyhow = "1.0.91"
chrono = { version = "0.4", default-features = false, features = ["clock", "serde"] }
libcgroups = { path = "../libcgroups", default-features = false, version = "0.4.1" } # MARK: Version
libcontainer = { path = "../libcontainer", default-features = false, version = "0.4.1" } # MARK: Version
Expand All @@ -43,8 +43,8 @@ caps = "0.5.5"
wasmer = { version = "4.0.0", optional = true }
wasmer-wasix = { version = "0.9.0", optional = true }
wasmedge-sdk = { version = "0.14.0", optional = true }
wasmtime = { version = "25.0.2", optional = true }
wasi-common = { version = "25.0.2", optional = true }
wasmtime = { version = "26.0.0", optional = true }
wasi-common = { version = "26.0.0", optional = true }
tracing = { version = "0.1.40", features = ["attributes"] }
tracing-subscriber = { version = "0.3.18", features = ["json", "env-filter"] }
tracing-journald = "0.3.0"
Expand All @@ -55,5 +55,5 @@ tempfile = "3"
scopeguard = "1.2.0"

[build-dependencies]
anyhow = "1.0.90"
anyhow = "1.0.91"
vergen-gitcl = { version = "1.0.1", features = ["build"] }
1 change: 1 addition & 0 deletions crates/youki/src/commands/create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ pub fn create(args: Create, root_path: PathBuf, systemd_cgroup: bool) -> Result<
.as_init(&args.bundle)
.with_systemd(systemd_cgroup)
.with_detach(true)
.with_no_pivot(args.no_pivot)
.build()?;

Ok(())
Expand Down
7 changes: 6 additions & 1 deletion crates/youki/src/commands/info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ pub fn print_os() {
println!("{:<18}{}", "Operating System", os);
} else if let Some(os) = try_read_os_from("/usr/lib/os-release") {
println!("{:<18}{}", "Operating System", os);
} else {
println!("{:<18}UNKNOWN", "Operating System");
}
}

Expand Down Expand Up @@ -204,6 +206,9 @@ pub fn print_namespaces() {
println!("{:<18}disabled", "Namespaces");
return;
}
} else {
println!("{:<18}UNKNOWN", "Namespaces");
// we don't return as we can atleast try and see if anything is enabled
}

// mount namespace is always enabled if namespaces are enabled
Expand Down Expand Up @@ -266,7 +271,7 @@ fn print_feature_status(config: &str, feature: &str, display: FeatureDisplay) {

println!(" {:<16}{}", display.name, status);
} else {
println!(" {:<16}{}", display.name, display.disabled);
println!(" {:<16}UNKNOWN", display.name);
}
}

Expand Down
1 change: 1 addition & 0 deletions crates/youki/src/commands/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ pub fn run(args: Run, root_path: PathBuf, systemd_cgroup: bool) -> Result<i32> {
.as_init(&args.bundle)
.with_systemd(systemd_cgroup)
.with_detach(args.detach)
.with_no_pivot(args.no_pivot)
.build()?;

container
Expand Down
21 changes: 21 additions & 0 deletions experiment/seccomp/src/instruction/arch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,24 @@ pub fn gen_validate(arc: &Arch) -> Vec<Instruction> {
Instruction::stmt(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
]
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_gen_validate_x86() {
let bpf_prog = gen_validate(&Arch::X86);
assert_eq!(bpf_prog[0], Instruction::stmt(BPF_LD | BPF_W | BPF_ABS, seccomp_data_arch_offset() as u32));
assert_eq!(bpf_prog[1], Instruction::jump(BPF_JMP | BPF_JEQ | BPF_K, 1, 0, AUDIT_ARCH_X86_64));
assert_eq!(bpf_prog[2], Instruction::stmt(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS));
}

#[test]
fn test_gen_validate_aarch64() {
let bpf_prog = gen_validate(&Arch::AArch64);
assert_eq!(bpf_prog[0], Instruction::stmt(BPF_LD | BPF_W | BPF_ABS, seccomp_data_arch_offset() as u32));
assert_eq!(bpf_prog[1], Instruction::jump(BPF_JMP | BPF_JEQ | BPF_K, 1, 0, AUDIT_ARCH_AARCH64));
assert_eq!(bpf_prog[2], Instruction::stmt(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS));
}
}
2 changes: 1 addition & 1 deletion experiment/seccomp/src/instruction/consts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ mod tests {
#[test]
fn test_seccomp_data_arg_size_offset() {
if cfg!(target_arch = "x86_64") {
assert_eq!(seccomp_data_arg_size_offset(), 8);
assert_eq!(seccomp_data_arg_size(), 8);
}
}

Expand Down
Loading

0 comments on commit 1926533

Please sign in to comment.