diff --git a/cgroups/src/common.rs b/cgroups/src/common.rs index 58ffc825cd..b7d7a74b6f 100644 --- a/cgroups/src/common.rs +++ b/cgroups/src/common.rs @@ -357,12 +357,18 @@ pub(crate) fn default_devices() -> Vec { ] } -pub(crate) fn delete_with_retry>(path: P) -> Result<()> { +/// Attempts to delete the path the requested number of times. +pub(crate) fn delete_with_retry, L: Into>>(path: P, retries: u32, limit_backoff: L) -> Result<()> { let mut attempts = 0; let mut delay = Duration::from_millis(10); let path = path.as_ref(); + let limit = if let Some(backoff) = limit_backoff.into() { + backoff + } else { + Duration::MAX + }; - while attempts < 5 { + while attempts < retries { if fs::remove_dir(path).is_ok() { return Ok(()); } @@ -370,6 +376,9 @@ pub(crate) fn delete_with_retry>(path: P) -> Result<()> { std::thread::sleep(delay); attempts += attempts; delay *= attempts; + if delay > limit { + delay = limit; + } } bail!("could not delete {:?}", path) diff --git a/cgroups/src/v1/manager.rs b/cgroups/src/v1/manager.rs index e2aea497d0..39b9c5670c 100644 --- a/cgroups/src/v1/manager.rs +++ b/cgroups/src/v1/manager.rs @@ -1,5 +1,6 @@ use std::fs; use std::path::Path; +use std::time::Duration; use std::{collections::HashMap, path::PathBuf}; use anyhow::bail; @@ -161,7 +162,7 @@ impl CgroupManager for Manager { let _ = nix::sys::signal::kill(Pid::from_raw(pid), nix::sys::signal::SIGKILL); } - common::delete_with_retry(cgroup_path.1)?; + common::delete_with_retry(cgroup_path.1, 4, Duration::from_millis(100))?; } } diff --git a/cgroups/src/v2/manager.rs b/cgroups/src/v2/manager.rs index d9e4aad327..54e4253fb0 100644 --- a/cgroups/src/v2/manager.rs +++ b/cgroups/src/v2/manager.rs @@ -1,8 +1,4 @@ -use std::{ - fs::{self}, - os::unix::fs::PermissionsExt, - path::{Path, PathBuf}, -}; +use std::{fs::{self}, os::unix::fs::PermissionsExt, path::{Path, PathBuf}, time::Duration}; use anyhow::Result; @@ -130,7 +126,7 @@ impl CgroupManager for Manager { let _ = nix::sys::signal::kill(Pid::from_raw(pid), nix::sys::signal::SIGKILL); } - common::delete_with_retry(&self.full_path)?; + common::delete_with_retry(&self.full_path,4, Duration::from_millis(100))?; } Ok(()) diff --git a/src/container/builder_impl.rs b/src/container/builder_impl.rs index 989db60ae6..0561967987 100644 --- a/src/container/builder_impl.rs +++ b/src/container/builder_impl.rs @@ -6,7 +6,7 @@ use crate::{ syscall::Syscall, utils, }; -use anyhow::{Context, Result}; +use anyhow::{bail, Context, Result}; use cgroups::{self, common::CgroupManager}; use nix::unistd::Pid; use oci_spec::runtime::{LinuxResources, Spec}; @@ -44,7 +44,15 @@ pub(super) struct ContainerBuilderImpl<'a> { impl<'a> ContainerBuilderImpl<'a> { pub(super) fn create(&mut self) -> Result<()> { - self.run_container() + if let Err(outer) = self.run_container().context("failed to create container") { + if let Err(inner) = self.cleanup_container() { + return Err(outer.context(inner)); + } + + return Err(outer); + } + + Ok(()) } fn run_container(&mut self) -> Result<()> { @@ -170,6 +178,33 @@ impl<'a> ContainerBuilderImpl<'a> { Ok(()) } + + fn cleanup_container(&self) -> Result<()> { + let linux = self.spec.linux.as_ref().context("no linux in spec")?; + let cgroups_path = utils::get_cgroup_path(&linux.cgroups_path, &self.container_id); + let cmanager = cgroups::common::create_cgroup_manager(&cgroups_path, self.use_systemd)?; + + let mut errors = Vec::new(); + if let Err(e) = cmanager.remove().context("failed to remove cgroup") { + errors.push(e.to_string()); + } + + if let Some(container) = &self.container { + if container.root.exists() { + if let Err(e) = fs::remove_dir_all(&container.root) + .with_context(|| format!("could not delete {}", container.root.display())) + { + errors.push(e.to_string()); + } + } + } + + if !errors.is_empty() { + bail!("failed to cleanup container: {}", errors.join(";")); + } + + Ok(()) + } } fn setup_mapping(rootless: &Rootless, pid: Pid) -> Result<()> { @@ -201,10 +236,12 @@ fn apply_cgroups( }; cmanager .add_task(pid) - .context("Failed to add tasks to cgroup manager")?; + .with_context(|| format!("failed to add task {} to cgroup manager", pid))?; + cmanager .apply(&controller_opt) - .context("Failed to apply resource limits through cgroup")?; + .context("failed to apply resource limits to cgroup")?; + Ok(()) } diff --git a/src/utils.rs b/src/utils.rs index 9c8e5d000b..75ebdaf672 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -76,7 +76,7 @@ pub fn set_name(_name: &str) -> Result<()> { pub fn get_cgroup_path(cgroups_path: &Option, container_id: &str) -> PathBuf { match cgroups_path { Some(cpath) => cpath.clone(), - None => PathBuf::from(format!("/youki/{}", container_id)), + None => PathBuf::from(container_id), } } @@ -244,7 +244,7 @@ mod tests { let cid = "sample_container_id"; assert_eq!( get_cgroup_path(&None, cid), - PathBuf::from("/youki/sample_container_id") + PathBuf::from("sample_container_id") ); assert_eq!( get_cgroup_path(&Some(PathBuf::from("/youki")), cid),