diff --git a/Cargo.lock b/Cargo.lock index 8d338e067..ce95aadce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -180,6 +180,15 @@ dependencies = [ "libc", ] +[[package]] +name = "fastrand" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77b705829d1e87f762c2df6da140b26af5839e1033aa84aa5f56bb688e4e1bdb" +dependencies = [ + "instant", +] + [[package]] name = "flate2" version = "1.0.20" @@ -977,6 +986,7 @@ dependencies = [ "chrono", "clap", "dbus", + "fastrand", "futures", "libc", "log", diff --git a/Cargo.toml b/Cargo.toml index ee1892a3d..3f70b42c7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,6 +28,7 @@ oci_spec = { version = "0.1.0", path = "./oci_spec" } systemd = { version = "0.8", default-features = false } dbus = "0.9.2" tabwriter = "1" +fastrand = "1.4.1" [dev-dependencies] oci_spec = { version = "0.1.0", path = "./oci_spec", features = ["proptests"] } diff --git a/oci_spec/src/lib.rs b/oci_spec/src/lib.rs index c3a9d68e5..1bd64d8e7 100644 --- a/oci_spec/src/lib.rs +++ b/oci_spec/src/lib.rs @@ -1,5 +1,7 @@ use nix::sys::stat::SFlag; use std::collections::HashMap; +use std::convert::TryFrom; + use std::fs::File; use std::path::{Path, PathBuf}; @@ -475,6 +477,23 @@ pub enum LinuxNamespaceType { Network = 0x40000000, } +impl TryFrom<&str> for LinuxNamespaceType { + type Error = anyhow::Error; + + fn try_from(namespace: &str) -> Result { + match namespace { + "mnt" => Ok(LinuxNamespaceType::Mount), + "cgroup" => Ok(LinuxNamespaceType::Cgroup), + "uts" => Ok(LinuxNamespaceType::Uts), + "ipc" => Ok(LinuxNamespaceType::Ipc), + "user" => Ok(LinuxNamespaceType::User), + "pid" => Ok(LinuxNamespaceType::Pid), + "net" => Ok(LinuxNamespaceType::Network), + _ => bail!("unknown namespace {}, could not convert", namespace), + } + } +} + #[derive(Serialize, Deserialize, Debug, Clone)] pub struct LinuxNamespace { #[serde(rename = "type")] diff --git a/src/container/builder.rs b/src/container/builder.rs index cfaf6dc98..70838572e 100644 --- a/src/container/builder.rs +++ b/src/container/builder.rs @@ -3,14 +3,16 @@ use std::path::PathBuf; use super::{init_builder::InitContainerBuilder, tenant_builder::TenantContainerBuilder}; pub struct ContainerBuilder { + /// Id of the container pub(super) container_id: String, - + /// Root directory for container state pub(super) root_path: PathBuf, - + /// Interface to operating system primitives pub(super) syscall: LinuxSyscall, - + /// File which will be used to communicate the pid of the + /// container process to the higher level runtime pub(super) pid_file: Option, - + /// Socket to communicate the file descriptor of the ptty pub(super) console_socket: Option, } @@ -60,7 +62,7 @@ impl ContainerBuilder { /// /// ContainerBuilder::new("74f1a4cb3801".to_owned()) /// .as_tenant() - /// .with_container_command(vec!["sleep".to_owned(), "9001".to_owned()]) + /// .with_container_args(vec!["sleep".to_owned(), "9001".to_owned()]) /// .build(); /// ``` #[allow(clippy::wrong_self_convention)] diff --git a/src/container/builder_impl.rs b/src/container/builder_impl.rs index cf85bddc9..d93a14ed8 100644 --- a/src/container/builder_impl.rs +++ b/src/container/builder_impl.rs @@ -1,6 +1,6 @@ use std::path::PathBuf; -use anyhow::Result; +use anyhow::{Context, Result}; use nix::{ sched, unistd::{Gid, Uid}, @@ -21,25 +21,39 @@ use crate::{ use super::{Container, ContainerStatus}; pub(super) struct ContainerBuilderImpl { + /// Flag indicating if an init or a tenant container should be created pub init: bool, + /// Interface to operating system primitives pub syscall: LinuxSyscall, + /// Flag indicating if systemd should be used for cgroup management pub use_systemd: bool, + /// Id of the container pub container_id: String, - pub root_path: PathBuf, + /// Directory where the state of the container will be stored pub container_dir: PathBuf, + /// OCI complient runtime spec pub spec: Spec, + /// Root filesystem of the container pub rootfs: PathBuf, + /// File which will be used to communicate the pid of the + /// container process to the higher level runtime pub pid_file: Option, + /// Socket to communicate the file descriptor of the ptty pub console_socket: Option, + /// Options for rootless containers pub rootless: Option, + /// Socket to communicate container start pub notify_socket: NotifyListener, + /// Container state pub container: Option, } impl ContainerBuilderImpl { pub(super) fn create(&mut self) -> Result<()> { if let Process::Parent(_) = self.run_container()? { - std::process::exit(0); + if self.init { + std::process::exit(0); + } } Ok(()) @@ -56,6 +70,7 @@ impl ContainerBuilderImpl { // first fork, which creates process, which will later create actual container process match fork::fork_first( + self.init, &self.pid_file, &self.rootless, linux, @@ -68,12 +83,18 @@ impl ContainerBuilderImpl { Process::Child(child) => { // set limits and namespaces to the process for rlimit in self.spec.process.rlimits.iter() { - self.syscall.set_rlimit(rlimit)? + self.syscall + .set_rlimit(rlimit) + .context("failed to set rlimit")?; } - self.syscall.set_id(Uid::from_raw(0), Gid::from_raw(0))?; + self.syscall + .set_id(Uid::from_raw(0), Gid::from_raw(0)) + .context("failed to become root")?; let without = sched::CloneFlags::CLONE_NEWUSER; - namespaces.apply_unshare(without)?; + namespaces + .apply_unshare(without) + .context("could not unshare namespaces")?; // set up tty if specified if let Some(csocketfd) = &self.console_socket { @@ -89,12 +110,15 @@ impl ContainerBuilderImpl { // This is actually the child process after fork Process::Init(mut init) => { // prepare process - setup_init_process( - &self.spec, - &self.syscall, - self.rootfs.clone(), - &namespaces, - )?; + if self.init { + setup_init_process( + &self.spec, + &self.syscall, + self.rootfs.clone(), + &namespaces, + )?; + } + init.ready()?; self.notify_socket.wait_for_container_start()?; // actually run the command / program to be run in container diff --git a/src/container/container.rs b/src/container/container.rs index d144e9497..f24a50dff 100644 --- a/src/container/container.rs +++ b/src/container/container.rs @@ -94,6 +94,10 @@ impl Container { self.state.status.can_delete() } + pub fn can_exec(&self) -> bool { + self.state.status == ContainerStatus::Running + } + pub fn pid(&self) -> Option { self.state.pid.map(Pid::from_raw) } @@ -129,6 +133,19 @@ impl Container { None } + pub fn bundle(&self) -> String { + self.state.bundle.clone() + } + + pub fn set_systemd(mut self, should_use: bool) -> Self { + self.state.use_systemd = Some(should_use); + self + } + + pub fn systemd(&self) -> Option { + self.state.use_systemd + } + pub fn update_status(&self, status: ContainerStatus) -> Self { let created = match (status, self.state.created) { (ContainerStatus::Created, None) => Some(Utc::now()), @@ -152,8 +169,4 @@ impl Container { root: container_root, }) } - - pub fn bundle(&self) -> String { - self.state.bundle.clone() - } } diff --git a/src/container/init_builder.rs b/src/container/init_builder.rs index b6789f21e..a6d1089ba 100644 --- a/src/container/init_builder.rs +++ b/src/container/init_builder.rs @@ -7,7 +7,10 @@ use std::{ path::{Path, PathBuf}, }; -use crate::{notify_socket::NotifyListener, rootless, tty, utils}; +use crate::{ + notify_socket::{NotifyListener, NOTIFY_FILE}, + rootless, tty, utils, +}; use super::{ builder::ContainerBuilder, builder_impl::ContainerBuilderImpl, Container, ContainerStatus, @@ -43,16 +46,22 @@ impl InitContainerBuilder { let spec = self.load_and_safeguard_spec(&container_dir)?; unistd::chdir(&*container_dir)?; - let container_state = self.create_container_state(&container_dir)?; + let container_state = self + .create_container_state(&container_dir)? + .set_systemd(self.use_systemd); - let notify_socket: NotifyListener = NotifyListener::new(&container_dir)?; + let notify_socket: NotifyListener = NotifyListener::new(NOTIFY_FILE)?; // convert path of root file system of the container to absolute path let rootfs = fs::canonicalize(&spec.root.path)?; // if socket file path is given in commandline options, // get file descriptors of console socket let csocketfd = if let Some(console_socket) = &self.base.console_socket { - Some(tty::setup_console_socket(&container_dir, console_socket)?) + Some(tty::setup_console_socket( + &container_dir, + console_socket, + "console-socket", + )?) } else { None }; @@ -63,7 +72,6 @@ impl InitContainerBuilder { init: true, syscall: self.base.syscall, container_id: self.base.container_id, - root_path: self.base.root_path, pid_file: self.base.pid_file, console_socket: csocketfd, use_systemd: self.use_systemd, diff --git a/src/container/mod.rs b/src/container/mod.rs index 59873ad22..3c71e82a0 100644 --- a/src/container/mod.rs +++ b/src/container/mod.rs @@ -1,5 +1,9 @@ //! Container management - +/// This crate is responsible for the creation of containers. It provides a builder that can +/// be used to configure and create containers. We distinguish between an init container for which +/// namespaces and cgroups will be created (usually) and a tenant container process that will move +/// into the existing namespaces and cgroups of the initial container process (e.g. used to implement +/// the exec command). pub mod builder; mod builder_impl; #[allow(clippy::module_inception)] diff --git a/src/container/state.rs b/src/container/state.rs index 49bfa4274..c77ad1fd5 100644 --- a/src/container/state.rs +++ b/src/container/state.rs @@ -4,14 +4,14 @@ use std::fmt::Display; use std::fs; use std::{fs::File, path::Path}; -use anyhow::Result; +use anyhow::{Context, Result}; use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; const STATE_FILE_PATH: &str = "state.json"; /// Indicates status of the container -#[derive(Serialize, Deserialize, Debug, Copy, Clone)] +#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq, Eq)] #[serde(rename_all = "camelCase")] pub enum ContainerStatus { // The container is being created @@ -78,6 +78,8 @@ pub struct State { // User that created the container #[serde(skip_serializing_if = "Option::is_none")] pub creator: Option, + // Specifies if systemd should be used to manage cgroups + pub use_systemd: Option, } impl State { @@ -96,6 +98,7 @@ impl State { annotations: HashMap::default(), created: None, creator: None, + use_systemd: None, } } @@ -115,7 +118,9 @@ impl State { pub fn load(container_root: &Path) -> Result { let state_file_path = container_root.join(STATE_FILE_PATH); - let file = File::open(state_file_path)?; + let file = File::open(&state_file_path).with_context(|| { + format!("failed to open container state file {:?}", state_file_path) + })?; let state: Self = serde_json::from_reader(&file)?; Ok(state) } diff --git a/src/container/tenant_builder.rs b/src/container/tenant_builder.rs index 386c3a6f0..a3bcff062 100644 --- a/src/container/tenant_builder.rs +++ b/src/container/tenant_builder.rs @@ -1,14 +1,32 @@ -use anyhow::{bail, Result}; -use oci_spec::Spec; +use anyhow::{bail, Context, Result}; +use caps::Capability; +use nix::unistd; +use oci_spec::{ + LinuxCapabilities, LinuxCapabilityType, LinuxNamespace, LinuxNamespaceType, Process, Spec, +}; + use std::{ collections::HashMap, + convert::TryFrom, + ffi::{CString, OsString}, fs, + os::unix::prelude::OsStrExt, path::{Path, PathBuf}, + str::FromStr, +}; + +use crate::{ + notify_socket::{NotifyListener, NotifySocket}, + rootless::detect_rootless, + stdio::FileDescriptor, + tty, utils, }; -use crate::{notify_socket::NotifyListener, rootless::detect_rootless, tty}; +use super::{builder::ContainerBuilder, builder_impl::ContainerBuilderImpl, Container}; -use super::{builder::ContainerBuilder, builder_impl::ContainerBuilderImpl}; +const NAMESPACE_TYPES: &[&str] = &["ipc", "uts", "net", "pid", "mnt", "cgroup"]; +const TENANT_NOTIFY: &str = "tenant-notify-"; +const TENANT_TTY: &str = "tenant-tty-"; /// Builder that can be used to configure the properties of a process /// that will join an existing container sandbox @@ -16,7 +34,10 @@ pub struct TenantContainerBuilder { base: ContainerBuilder, env: HashMap, cwd: Option, - command: Vec, + args: Vec, + no_new_privs: Option, + capabilities: Vec, + process: Option, } impl TenantContainerBuilder { @@ -28,7 +49,10 @@ impl TenantContainerBuilder { base: builder, env: HashMap::new(), cwd: None, - command: vec!["sh".to_owned()], + args: Vec::new(), + no_new_privs: None, + capabilities: Vec::new(), + process: None, } } @@ -45,47 +69,65 @@ impl TenantContainerBuilder { } /// Sets the command the container will be started with - pub fn with_container_command(mut self, command: Vec) -> Self { - self.command = command; + pub fn with_container_args(mut self, args: Vec) -> Self { + self.args = args; + self + } + + pub fn with_no_new_privs(mut self, no_new_privs: bool) -> Self { + self.no_new_privs = Some(no_new_privs); + self + } + + pub fn with_capabilities(mut self, capabilities: Vec) -> Self { + self.capabilities = capabilities; + self + } + + pub fn with_process>(mut self, path: P) -> Self { + self.process = Some(path.into()); self } /// Joins an existing container pub fn build(self) -> Result<()> { let container_dir = self.lookup_container_dir()?; - let spec = self.load_init_spec(&container_dir)?; + let container = self.load_container_state(container_dir.clone())?; + let mut spec = self.load_init_spec(&container_dir)?; + self.adapt_spec_for_tenant(&mut spec, &container)?; + log::debug!("{:#?}", spec); - let notify_socket: NotifyListener = NotifyListener::new(&container_dir)?; + unistd::chdir(&*container_dir)?; + let (notify_listener, notify_path) = Self::setup_notify_listener(&container_dir)?; // convert path of root file system of the container to absolute path let rootfs = fs::canonicalize(&spec.root.path)?; // if socket file path is given in commandline options, // get file descriptors of console socket - let csocketfd = if let Some(console_socket) = &self.base.console_socket { - Some(tty::setup_console_socket(&container_dir, console_socket)?) - } else { - None - }; + let csocketfd = self.setup_tty_socket(&container_dir)?; + let use_systemd = self.should_use_systemd(&container); let rootless = detect_rootless(&spec)?; let mut builder_impl = ContainerBuilderImpl { init: false, syscall: self.base.syscall, container_id: self.base.container_id, - root_path: self.base.root_path, pid_file: self.base.pid_file, console_socket: csocketfd, - use_systemd: false, + use_systemd, container_dir, spec, rootfs, rootless, - notify_socket, + notify_socket: notify_listener, container: None, }; builder_impl.create()?; + + let mut notify_socket = NotifySocket::new(notify_path); + notify_socket.notify_container_start()?; Ok(()) } @@ -101,7 +143,243 @@ impl TenantContainerBuilder { fn load_init_spec(&self, container_dir: &Path) -> Result { let spec_path = container_dir.join("config.json"); - let spec = oci_spec::Spec::load(spec_path)?; + let spec = oci_spec::Spec::load(spec_path).context("failed to load spec")?; Ok(spec) } + + fn load_container_state(&self, container_dir: PathBuf) -> Result { + let container = Container::load(container_dir)?.refresh_status()?; + if !container.can_exec() { + bail!( + "Cannot exec as container is in state {}", + container.status() + ); + } + + Ok(container) + } + + fn adapt_spec_for_tenant(&self, spec: &mut Spec, container: &Container) -> Result<()> { + if let Some(ref process) = self.process { + self.set_process(spec, process)?; + } else { + self.set_working_dir(spec)?; + self.set_args(spec)?; + self.set_environment(spec)?; + self.set_no_new_privileges(spec); + self.set_capabilities(spec)?; + } + + if container.pid().is_none() { + bail!("Could not retrieve container init pid"); + } + + let init_process = procfs::process::Process::new(container.pid().unwrap().as_raw())?; + self.set_namespaces(spec, init_process.namespaces()?)?; + + Ok(()) + } + + fn set_process(&self, spec: &mut Spec, process: &Path) -> Result<()> { + if !process.exists() { + bail!( + "Process.json file does not exist at specified path {}", + process.display() + ) + } + + let process = utils::open(process)?; + let process_spec: Process = serde_json::from_reader(process)?; + spec.process = process_spec; + Ok(()) + } + + fn set_working_dir(&self, spec: &mut Spec) -> Result<()> { + if let Some(ref cwd) = self.cwd { + if cwd.is_relative() { + bail!( + "Current working directory must be an absolute path, but is {}", + cwd.display() + ); + } + + spec.process.cwd = cwd.to_string_lossy().to_string(); + } + + Ok(()) + } + + fn set_args(&self, spec: &mut Spec) -> Result<()> { + if self.args.is_empty() { + bail!("Container command was not specified") + } + + spec.process.args = self.args.clone(); + Ok(()) + } + + fn set_environment(&self, spec: &mut Spec) -> Result<()> { + spec.process.env.append( + &mut self + .env + .iter() + .map(|(k, v)| format!("{}={}", k, v)) + .collect(), + ); + + Ok(()) + } + + fn set_no_new_privileges(&self, spec: &mut Spec) { + if let Some(no_new_privs) = self.no_new_privs { + spec.process.no_new_privileges = no_new_privs; + } + } + + fn set_capabilities(&self, spec: &mut Spec) -> Result<()> { + if !self.capabilities.is_empty() { + let mut caps: Vec = Vec::with_capacity(self.capabilities.len()); + for cap in &self.capabilities { + caps.push(LinuxCapabilityType { + cap: Capability::from_str(cap)?, + }); + } + + if let Some(ref mut spec_caps) = spec.process.capabilities { + spec_caps.ambient.append(&mut caps.clone()); + spec_caps.bounding.append(&mut caps.clone()); + spec_caps.effective.append(&mut caps.clone()); + spec_caps.inheritable.append(&mut caps.clone()); + spec_caps.permitted.append(&mut caps); + } else { + spec.process.capabilities = Some(LinuxCapabilities { + ambient: caps.clone(), + bounding: caps.clone(), + effective: caps.clone(), + inheritable: caps.clone(), + permitted: caps, + }) + } + } + + Ok(()) + } + + fn set_namespaces(&self, spec: &mut Spec, init_namespaces: Vec) -> Result<()> { + let mut tenant_namespaces = Vec::with_capacity(init_namespaces.len()); + + for ns_type in NAMESPACE_TYPES.iter().copied() { + if let Some(init_ns) = init_namespaces.iter().find(|n| n.ns_type.eq(ns_type)) { + let tenant_ns = LinuxNamespaceType::try_from(ns_type)?; + tenant_namespaces.push(LinuxNamespace { + typ: tenant_ns, + path: Some(init_ns.path.to_string_lossy().to_string()), + }) + } + } + + let mut linux = spec.linux.as_mut().unwrap(); + linux.namespaces = tenant_namespaces; + Ok(()) + } + + fn should_use_systemd(&self, container: &Container) -> bool { + if let Some(use_systemd) = container.systemd() { + return use_systemd; + } + + false + } + + fn setup_notify_listener(container_dir: &Path) -> Result<(NotifyListener, PathBuf)> { + let notify_name = Self::generate_name(&container_dir, TENANT_NOTIFY); + let socket_path = container_dir.join(¬ify_name); + let notify_listener: NotifyListener = NotifyListener::new(¬ify_name)?; + + Ok((notify_listener, socket_path)) + } + + fn setup_tty_socket(&self, container_dir: &Path) -> Result> { + let tty_name = Self::generate_name(&container_dir, TENANT_TTY); + let csocketfd = if let Some(console_socket) = &self.base.console_socket { + Some(tty::setup_console_socket( + container_dir, + console_socket, + &tty_name, + )?) + } else { + None + }; + + Ok(csocketfd) + } + + fn generate_name(dir: &Path, prefix: &str) -> String { + loop { + let rand = fastrand::i32(..); + let name = format!("{}{:x}.sock", prefix, rand); + if !dir.join(&name).exists() { + return name; + } + } + } +} + +// Can be removed once https://github.com/eminence/procfs/pull/135 is available +trait GetNamespace { + fn namespaces(&self) -> Result>; +} + +impl GetNamespace for procfs::process::Process { + /// Describes namespaces to which the process with the corresponding PID belongs. + /// Doc reference: https://man7.org/linux/man-pages/man7/namespaces.7.html + fn namespaces(&self) -> Result> { + let proc_path = PathBuf::from(format!("/proc/{}", self.pid())); + let ns = proc_path.join("ns"); + let mut namespaces = Vec::new(); + for entry in fs::read_dir(ns)? { + let entry = entry?; + let path = entry.path(); + let ns_type = entry.file_name(); + let cstr = CString::new(path.as_os_str().as_bytes()).unwrap(); + + let mut stat = unsafe { std::mem::zeroed() }; + if unsafe { libc::stat(cstr.as_ptr(), &mut stat) } != 0 { + bail!("Unable to stat {:?}", path); + } + + namespaces.push(Namespace { + ns_type, + path, + identifier: stat.st_ino, + device_id: stat.st_dev, + }) + } + + Ok(namespaces) + } } + +/// Information about a namespace +/// +/// See also the [Process::namespaces()] method +#[derive(Debug, Clone)] +pub struct Namespace { + /// Namespace type + pub ns_type: OsString, + /// Handle to the namespace + pub path: PathBuf, + /// Namespace identifier (inode number) + pub identifier: u64, + /// Device id of the namespace + pub device_id: u64, +} + +impl PartialEq for Namespace { + fn eq(&self, other: &Self) -> bool { + // see https://lore.kernel.org/lkml/87poky5ca9.fsf@xmission.com/ + self.identifier == other.identifier && self.device_id == other.device_id + } +} + +impl Eq for Namespace {} diff --git a/src/exec.rs b/src/exec.rs new file mode 100644 index 000000000..1efcbb4ab --- /dev/null +++ b/src/exec.rs @@ -0,0 +1,86 @@ +use anyhow::Result; +use clap::Clap; +use std::{error::Error, path::PathBuf}; + +use crate::container::builder::ContainerBuilder; + +#[derive(Clap, Debug)] +pub struct Exec { + /// Unix socket (file) path , which will receive file descriptor of the writing end of the pseudoterminal + #[clap(long)] + pub console_socket: Option, + #[clap(short, long)] + pub tty: bool, + #[clap(long)] + /// Current working directory of the container + pub cwd: Option, + #[clap(long)] + /// The file to which the pid of the container process should be written to + pub pid_file: Option, + /// Environment variables that should be set in the container + #[clap(short, long, parse(try_from_str = parse_key_val), number_of_values = 1)] + pub env: Vec<(String, String)>, + /// Prevent the process from gaining additional privileges + #[clap(long)] + pub no_new_privs: bool, + /// Path to process.json + #[clap(short, long)] + pub process: Option, + /// Detach from the container process + #[clap(short, long)] + pub detach: bool, + /// Identifier of the container + pub container_id: String, + /// Command that should be executed in the container + #[clap(required = false)] + pub command: Vec, +} + +impl Exec { + pub fn exec(&self, root_path: PathBuf) -> Result<()> { + let mut builder = + ContainerBuilder::new(self.container_id.clone()).with_root_path(root_path); + + if let Some(console_socket) = &self.console_socket { + builder = builder.with_console_socket(console_socket); + } + + if let Some(pid_file) = &self.pid_file { + builder = builder.with_pid_file(pid_file); + } + + let mut builder = builder.as_tenant(); + + if let Some(cwd) = &self.cwd { + builder = builder.with_cwd(cwd); + } + + if !self.env.is_empty() { + let env = self.env.clone().into_iter().collect(); + builder = builder.with_env(env) + } + + builder = builder.with_no_new_privs(self.no_new_privs); + + if let Some(process) = &self.process { + builder = builder.with_process(process); + } + + builder.with_container_args(self.command.clone()).build()?; + + Ok(()) + } +} + +fn parse_key_val(s: &str) -> Result<(T, U), Box> +where + T: std::str::FromStr, + T::Err: Error + Send + Sync + 'static, + U: std::str::FromStr, + U::Err: Error + Send + Sync + 'static, +{ + let pos = s + .find('=') + .ok_or_else(|| format!("invalid KEY=value: no `=` found in `{}`", s))?; + Ok((s[..pos].parse()?, s[pos + 1..].parse()?)) +} diff --git a/src/lib.rs b/src/lib.rs index da3c23572..439f53d4d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,6 +9,7 @@ pub mod container; pub mod create; pub mod dbus; pub mod delete; +pub mod exec; pub mod info; pub mod kill; pub mod list; diff --git a/src/main.rs b/src/main.rs index c3eebf2dd..b9aea21a3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,6 +10,7 @@ use clap::Clap; use youki::create; use youki::delete; +use youki::exec; use youki::info; use youki::kill; use youki::list; @@ -47,6 +48,8 @@ enum SubCommand { #[clap(version = "0.0.1", author = "utam0k ")] Start(start::Start), #[clap(version = "0.0.1", author = "utam0k ")] + Exec(exec::Exec), + #[clap(version = "0.0.1", author = "utam0k ")] Kill(kill::Kill), #[clap(version = "0.0.1", author = "utam0k ")] Delete(delete::Delete), @@ -79,6 +82,7 @@ fn main() -> Result<()> { match opts.subcmd { SubCommand::Create(create) => create.exec(root_path, systemd_cgroup), SubCommand::Start(start) => start.exec(root_path), + SubCommand::Exec(exec) => exec.exec(root_path), SubCommand::Kill(kill) => kill.exec(root_path), SubCommand::Delete(delete) => delete.exec(root_path, systemd_cgroup), SubCommand::State(state) => state.exec(root_path), diff --git a/src/namespaces.rs b/src/namespaces.rs index 1f2361652..512e28e71 100644 --- a/src/namespaces.rs +++ b/src/namespaces.rs @@ -60,6 +60,7 @@ impl Namespaces { (space, fd) }) .collect(); + for &(space, fd) in &to_enter { self.command.set_ns(fd, space)?; unistd::close(fd)?; diff --git a/src/notify_socket.rs b/src/notify_socket.rs index 45f9eff39..63738b61b 100644 --- a/src/notify_socket.rs +++ b/src/notify_socket.rs @@ -1,10 +1,11 @@ +use std::env; use std::io::prelude::*; use std::os::unix::io::AsRawFd; use std::os::unix::net::{UnixListener, UnixStream}; -use std::path::Path; +use std::path::PathBuf; use anyhow::Result; -use nix::unistd::close; +use nix::unistd::{self, close}; pub const NOTIFY_FILE: &str = "notify.sock"; @@ -13,9 +14,8 @@ pub struct NotifyListener { } impl NotifyListener { - pub fn new(root: &Path) -> Result { - let _notify_file_path = root.join(NOTIFY_FILE); - let stream = UnixListener::bind("notify.sock")?; + pub fn new(socket_name: &str) -> Result { + let stream = UnixListener::bind(socket_name)?; Ok(Self { socket: stream }) } @@ -24,7 +24,7 @@ impl NotifyListener { Ok((mut socket, _addr)) => { let mut response = String::new(); socket.read_to_string(&mut response)?; - log::debug!("receive :{}", response); + log::debug!("received: {}", response); } Err(e) => println!("accept function failed: {:?}", e), } @@ -37,18 +37,25 @@ impl NotifyListener { } } -pub struct NotifySocket {} +pub struct NotifySocket { + path: PathBuf, +} impl NotifySocket { - pub fn new(_root: &Path) -> Result { - Ok(Self {}) + pub fn new>(socket_path: P) -> Self { + Self { + path: socket_path.into(), + } } pub fn notify_container_start(&mut self) -> Result<()> { - log::debug!("connection start"); - let mut stream = UnixStream::connect("notify.sock")?; + log::debug!("notify container start"); + let cwd = env::current_dir()?; + unistd::chdir(&*self.path.parent().unwrap())?; + let mut stream = UnixStream::connect(&self.path.file_name().unwrap())?; stream.write_all(b"start container")?; - log::debug!("write finish"); + log::debug!("notify finished"); + unistd::chdir(&*cwd)?; Ok(()) } diff --git a/src/process/fork.rs b/src/process/fork.rs index 915f802cd..19d668984 100644 --- a/src/process/fork.rs +++ b/src/process/fork.rs @@ -21,6 +21,7 @@ use crate::rootless::Rootless; /// Function to perform the first fork for in order to run the container process pub fn fork_first>( + init: bool, pid_file: &Option

, rootless: &Option, linux: &oci_spec::Linux, @@ -68,8 +69,9 @@ pub fn fork_first>( // wait for child to fork init process and report back its pid let init_pid = parent.wait_for_child_ready(child)?; log::debug!("init pid is {:?}", init_pid); - if rootless.is_none() && linux.resources.is_some() { - cmanager.add_task(Pid::from_raw(init_pid))?; + + cmanager.add_task(Pid::from_raw(init_pid))?; + if rootless.is_none() && linux.resources.is_some() && init { cmanager.apply(&linux.resources.as_ref().unwrap())?; } @@ -86,6 +88,7 @@ pub fn fork_first>( if let Some(pid_file) = pid_file { fs::write(&pid_file, format!("{}", child))?; } + Ok(Process::Parent(parent)) } } diff --git a/src/start.rs b/src/start.rs index d37f95cbc..8788221e4 100644 --- a/src/start.rs +++ b/src/start.rs @@ -7,7 +7,7 @@ use clap::Clap; use nix::unistd; use crate::container::{Container, ContainerStatus}; -use crate::notify_socket::NotifySocket; +use crate::notify_socket::{NotifySocket, NOTIFY_FILE}; #[derive(Clap, Debug)] pub struct Start { @@ -33,7 +33,7 @@ impl Start { unistd::chdir(container.root.as_os_str())?; - let mut notify_socket = NotifySocket::new(&container.root)?; + let mut notify_socket = NotifySocket::new(&container.root.join(NOTIFY_FILE)); notify_socket.notify_container_start()?; container.update_status(ContainerStatus::Running).save()?; diff --git a/src/tty.rs b/src/tty.rs index a375e6ca1..023853a3a 100644 --- a/src/tty.rs +++ b/src/tty.rs @@ -4,6 +4,7 @@ use std::os::unix::fs::symlink; use std::os::unix::io::AsRawFd; use std::path::Path; +use anyhow::Context; use anyhow::{bail, Result}; use nix::errno::Errno; use nix::sys::socket; @@ -18,9 +19,10 @@ use crate::stdio::FileDescriptor; pub fn setup_console_socket( container_dir: &Path, console_socket_path: &Path, + socket_name: &str, ) -> Result { - let csocket = "console-socket"; - symlink(console_socket_path, container_dir.join(csocket))?; + let linked = container_dir.join(socket_name); + symlink(console_socket_path, &linked)?; let mut csocketfd = socket::socket( socket::AddressFamily::Unix, @@ -30,11 +32,11 @@ pub fn setup_console_socket( )?; csocketfd = match socket::connect( csocketfd, - &socket::SockAddr::Unix(socket::UnixAddr::new(&*csocket)?), + &socket::SockAddr::Unix(socket::UnixAddr::new(&*socket_name)?), ) { Err(e) => { if e != ::nix::Error::Sys(Errno::ENOENT) { - bail!("failed to open {}", csocket); + bail!("failed to open {}", socket_name); } -1 } @@ -46,7 +48,8 @@ pub fn setup_console_socket( pub fn setup_console(console_fd: &FileDescriptor) -> Result<()> { // You can also access pty master, but it is better to use the API. // ref. https://github.com/containerd/containerd/blob/261c107ffc4ff681bc73988f64e3f60c32233b37/vendor/github.com/containerd/go-runc/console.go#L139-L154 - let openpty_result = nix::pty::openpty(None, None)?; + let openpty_result = + nix::pty::openpty(None, None).context("could not create pseudo terminal")?; let pty_name: &[u8] = b"/dev/ptmx"; let iov = [uio::IoVec::from_slice(pty_name)]; let fds = [openpty_result.master]; @@ -57,7 +60,8 @@ pub fn setup_console(console_fd: &FileDescriptor) -> Result<()> { &[cmsg], socket::MsgFlags::empty(), None, - )?; + ) + .context("failed to send pty master")?; setsid()?; if unsafe { libc::ioctl(openpty_result.slave, libc::TIOCSCTTY) } < 0 { @@ -65,7 +69,7 @@ pub fn setup_console(console_fd: &FileDescriptor) -> Result<()> { }; let slave = FileDescriptor::from(openpty_result.slave); stdio::connect_stdio(&slave, &slave, &slave).expect("could not dup tty to stderr"); - close(console_fd.as_raw_fd())?; + close(console_fd.as_raw_fd()).context("could not close console socket")?; Ok(()) } @@ -82,6 +86,8 @@ mod tests { use crate::utils::{create_temp_dir, TempDir}; + const CONSOLE_SOCKET: &str = "console-socket"; + fn setup(testname: &str) -> Result<(TempDir, PathBuf, PathBuf)> { let testdir = create_temp_dir(testname)?; let rundir_path = Path::join(&testdir, "run"); @@ -100,7 +106,7 @@ mod tests { let (testdir, rundir_path, socket_path) = init.unwrap(); let lis = UnixListener::bind(Path::join(&testdir, "console-socket")); assert!(lis.is_ok()); - let fd = setup_console_socket(&&rundir_path, &socket_path); + let fd = setup_console_socket(&&rundir_path, &socket_path, CONSOLE_SOCKET); assert!(fd.is_ok()); assert_ne!(fd.unwrap().as_raw_fd(), -1); } @@ -111,7 +117,7 @@ mod tests { let init = setup("test_setup_console_socket_empty"); assert!(init.is_ok()); let (_testdir, rundir_path, socket_path) = init.unwrap(); - let fd = setup_console_socket(&rundir_path, &socket_path); + let fd = setup_console_socket(&rundir_path, &socket_path, CONSOLE_SOCKET); assert!(fd.is_ok()); assert_eq!(fd.unwrap().as_raw_fd(), -1); } @@ -124,7 +130,7 @@ mod tests { let (testdir, rundir_path, socket_path) = init.unwrap(); let _socket = File::create(Path::join(&testdir, "console-socket")); assert!(_socket.is_ok()); - let fd = setup_console_socket(&rundir_path, &socket_path); + let fd = setup_console_socket(&rundir_path, &socket_path, CONSOLE_SOCKET); assert!(fd.is_err()); } @@ -136,7 +142,7 @@ mod tests { let (testdir, rundir_path, socket_path) = init.unwrap(); let lis = UnixListener::bind(Path::join(&testdir, "console-socket")); assert!(lis.is_ok()); - let fd = setup_console_socket(&&rundir_path, &socket_path); + let fd = setup_console_socket(&&rundir_path, &socket_path, CONSOLE_SOCKET); let status = setup_console(&fd.unwrap()); assert!(status.is_ok()); } diff --git a/src/utils.rs b/src/utils.rs index d5f0729fb..916e50d9a 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -2,7 +2,7 @@ use std::env; use std::ffi::CString; -use std::fs; +use std::fs::{self, File}; use std::ops::Deref; use std::path::{Path, PathBuf}; use std::time::Duration; @@ -101,6 +101,11 @@ pub fn create_dir_all>(path: P) -> Result<()> { fs::create_dir_all(path).with_context(|| format!("failed to create directory {:?}", path)) } +pub fn open>(path: P) -> Result { + let path = path.as_ref(); + File::open(path).with_context(|| format!("failed to open {:?}", path)) +} + pub struct TempDir { path: Option, }