Skip to content

Commit

Permalink
PVF: Fix unshare "no such file or directory" error
Browse files Browse the repository at this point in the history
In #2304 we introduced this bug
where unshare will always fail, leading to a scary error message for validators.

Not sure how this wasn't caught in testing, neither for me nor the user who
reported #2304. 🙈

Closes #2320
  • Loading branch information
mrcnski committed Nov 21, 2023
1 parent 552be48 commit 2075df9
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 75 deletions.
5 changes: 2 additions & 3 deletions polkadot/node/core/pvf/src/artifacts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -499,8 +499,7 @@ mod tests {

#[tokio::test]
async fn remove_stale_cache_on_startup() {
let cache_dir = crate::worker_intf::tmppath("test-cache").await.unwrap();
fs::create_dir_all(&cache_dir).unwrap();
let cache_dir = tempfile::Builder::new().prefix("test-cache-").tempdir().unwrap();

// invalid prefix
create_rand_artifact(&cache_dir, "");
Expand Down Expand Up @@ -529,7 +528,7 @@ mod tests {

assert_eq!(fs::read_dir(&cache_dir).unwrap().count(), 7);

let artifacts = Artifacts::new_and_prune(&cache_dir).await;
let artifacts = Artifacts::new_and_prune(cache_dir.path()).await;

assert_eq!(fs::read_dir(&cache_dir).unwrap().count(), 1);
assert_eq!(artifacts.len(), 1);
Expand Down
4 changes: 2 additions & 2 deletions polkadot/node/core/pvf/src/execute/worker_intf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ where
// Cheaply create a hard link to the artifact. The artifact is always at a known location in the
// worker cache, and the child can't access any other artifacts or gain any information from the
// original filename.
let link_path = worker_dir::execute_artifact(&worker_dir.path);
let link_path = worker_dir::execute_artifact(worker_dir.path());
if let Err(err) = tokio::fs::hard_link(artifact_path, link_path).await {
gum::warn!(
target: LOG_TARGET,
Expand All @@ -292,7 +292,7 @@ where
}
}

let worker_dir_path = worker_dir.path.clone();
let worker_dir_path = worker_dir.path().to_owned();
let outcome = f(worker_dir).await;

// Try to clear the worker dir.
Expand Down
4 changes: 2 additions & 2 deletions polkadot/node/core/pvf/src/prepare/worker_intf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ where
{
// Create the tmp file here so that the child doesn't need any file creation rights. This will
// be cleared at the end of this function.
let tmp_file = worker_dir::prepare_tmp_artifact(&worker_dir.path);
let tmp_file = worker_dir::prepare_tmp_artifact(worker_dir.path());
if let Err(err) = tokio::fs::File::create(&tmp_file).await {
gum::warn!(
target: LOG_TARGET,
Expand All @@ -333,7 +333,7 @@ where
}
};

let worker_dir_path = worker_dir.path.clone();
let worker_dir_path = worker_dir.path().to_owned();
let outcome = f(tmp_file, stream, worker_dir).await;

// Try to clear the worker dir.
Expand Down
17 changes: 7 additions & 10 deletions polkadot/node/core/pvf/src/security.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,18 +158,15 @@ async fn check_can_unshare_user_namespace_and_change_root(
) -> SecureModeResult {
cfg_if::cfg_if! {
if #[cfg(target_os = "linux")] {
let cache_dir_tempdir =
crate::worker_intf::tmppath_in("check-can-unshare", cache_path)
.await
.map_err(
|err|
SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(
format!("could not create a temporary directory in {:?}: {}", cache_path, err)
)
)?;
let cache_dir_tempdir = tempfile::Builder::new()
.prefix("check-can-unshare-")
.tempdir_in(cache_path)
.map_err(|err| SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(
format!("could not create a temporary directory in {:?}: {}", cache_path, err)
))?;
match tokio::process::Command::new(prepare_worker_program_path)
.arg("--check-can-unshare-user-namespace-and-change-root")
.arg(cache_dir_tempdir)
.arg(cache_dir_tempdir.path())
.output()
.await
{
Expand Down
105 changes: 47 additions & 58 deletions polkadot/node/core/pvf/src/worker_intf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ pub async fn spawn_with_program_path(

with_transient_socket_path(debug_id, |socket_path| {
let socket_path = socket_path.to_owned();
let worker_dir_path = worker_dir.path().to_owned();

async move {
let listener = UnixListener::bind(&socket_path).map_err(|err| {
Expand All @@ -91,7 +92,7 @@ pub async fn spawn_with_program_path(
&program_path,
&extra_args,
&socket_path,
&worker_dir.path,
&worker_dir_path,
security_status,
)
.map_err(|err| {
Expand All @@ -100,15 +101,14 @@ pub async fn spawn_with_program_path(
%debug_id,
?program_path,
?extra_args,
?worker_dir.path,
?worker_dir_path,
?socket_path,
"cannot spawn a worker: {:?}",
err,
);
SpawnErr::ProcessSpawn
})?;

let worker_dir_path = worker_dir.path.clone();
futures::select! {
accept_result = listener.accept().fuse() => {
let (stream, _) = accept_result.map_err(|err| {
Expand Down Expand Up @@ -150,7 +150,42 @@ where
F: FnOnce(&Path) -> Fut,
Fut: futures::Future<Output = Result<T, SpawnErr>> + 'static,
{
let socket_path = tmppath(&format!("pvf-host-{}", debug_id))
/// Returns a path under [`std::env::temp_dir`]. The path name will start with the given prefix.
///
/// There is only a certain number of retries. If exceeded this function will give up and return
/// an error.
pub async fn tmppath(prefix: &str) -> io::Result<PathBuf> {
fn make_tmppath(prefix: &str, dir: &Path) -> PathBuf {
use rand::distributions::Alphanumeric;

const DESCRIMINATOR_LEN: usize = 10;

let mut buf = Vec::with_capacity(prefix.len() + DESCRIMINATOR_LEN);
buf.extend(prefix.as_bytes());
buf.extend(rand::thread_rng().sample_iter(&Alphanumeric).take(DESCRIMINATOR_LEN));

let s = std::str::from_utf8(&buf)
.expect("the string is collected from a valid utf-8 sequence; qed");

let mut path = dir.to_owned();
path.push(s);
path
}

const NUM_RETRIES: usize = 50;

let dir = std::env::temp_dir();
for _ in 0..NUM_RETRIES {
let tmp_path = make_tmppath(prefix, &dir);
if !tmp_path.exists() {
return Ok(tmp_path)
}
}

Err(io::Error::new(io::ErrorKind::Other, "failed to create a temporary path"))
}

let socket_path = tmppath(&format!("pvf-host-{}-", debug_id))
.await
.map_err(|_| SpawnErr::TmpPath)?;
let result = f(&socket_path).await;
Expand All @@ -162,46 +197,6 @@ where
result
}

/// Returns a path under the given `dir`. The path name will start with the given prefix.
///
/// There is only a certain number of retries. If exceeded this function will give up and return an
/// error.
pub async fn tmppath_in(prefix: &str, dir: &Path) -> io::Result<PathBuf> {
fn make_tmppath(prefix: &str, dir: &Path) -> PathBuf {
use rand::distributions::Alphanumeric;

const DESCRIMINATOR_LEN: usize = 10;

let mut buf = Vec::with_capacity(prefix.len() + DESCRIMINATOR_LEN);
buf.extend(prefix.as_bytes());
buf.extend(rand::thread_rng().sample_iter(&Alphanumeric).take(DESCRIMINATOR_LEN));

let s = std::str::from_utf8(&buf)
.expect("the string is collected from a valid utf-8 sequence; qed");

let mut path = dir.to_owned();
path.push(s);
path
}

const NUM_RETRIES: usize = 50;

for _ in 0..NUM_RETRIES {
let tmp_path = make_tmppath(prefix, dir);
if !tmp_path.exists() {
return Ok(tmp_path)
}
}

Err(io::Error::new(io::ErrorKind::Other, "failed to create a temporary path"))
}

/// The same as [`tmppath_in`], but uses [`std::env::temp_dir`] as the directory.
pub async fn tmppath(prefix: &str) -> io::Result<PathBuf> {
let temp_dir = std::env::temp_dir();
tmppath_in(prefix, &temp_dir).await
}

/// A struct that represents an idle worker.
///
/// This struct is supposed to be used as a token that is passed by move into a subroutine that
Expand All @@ -224,8 +219,6 @@ pub struct IdleWorker {
pub enum SpawnErr {
/// Cannot obtain a temporary path location.
TmpPath,
/// An FS error occurred.
Fs(String),
/// Cannot bind the socket to the given path.
Bind,
/// An error happened during accepting a connection to the socket.
Expand Down Expand Up @@ -419,26 +412,22 @@ pub async fn framed_recv(r: &mut (impl AsyncRead + Unpin)) -> io::Result<Vec<u8>
/// ```
#[derive(Debug)]
pub struct WorkerDir {
pub path: PathBuf,
tempdir: tempfile::TempDir,
}

impl WorkerDir {
/// Creates a new, empty worker dir with a random name in the given cache dir.
pub async fn new(debug_id: &'static str, cache_dir: &Path) -> Result<Self, SpawnErr> {
let prefix = format!("worker-dir-{}-", debug_id);
let path = tmppath_in(&prefix, cache_dir).await.map_err(|_| SpawnErr::TmpPath)?;
tokio::fs::create_dir(&path)
.await
.map_err(|err| SpawnErr::Fs(err.to_string()))?;
Ok(Self { path })
let tempdir = tempfile::Builder::new()
.prefix(&prefix)
.tempdir_in(cache_dir)
.map_err(|_| SpawnErr::TmpPath)?;
Ok(Self { tempdir })
}
}

// Try to clean up the temporary worker dir at the end of the worker's lifetime. It should be wiped
// on startup, but we make a best effort not to leave it around.
impl Drop for WorkerDir {
fn drop(&mut self) {
let _ = std::fs::remove_dir_all(&self.path);
pub fn path(&self) -> &Path {
self.tempdir.path()
}
}

Expand Down

0 comments on commit 2075df9

Please sign in to comment.