Skip to content

Commit

Permalink
External loaders: shell-like $PATH behavior (#4833)
Browse files Browse the repository at this point in the history
Keep the first path per unique name, only complain about the others in
DEBUG.

This makes us behave like any shell and avoids any past, present and
future PATH issues that come with Python tools.
  • Loading branch information
teh-cmc authored and emilk committed Jan 17, 2024
1 parent a63bb6e commit be1d018
Showing 1 changed file with 35 additions and 40 deletions.
75 changes: 35 additions & 40 deletions crates/re_data_source/src/data_loader/loader_external.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,56 +27,51 @@ pub const EXTERNAL_DATA_LOADER_INCOMPATIBLE_EXIT_CODE: i32 = 66;
pub static EXTERNAL_LOADER_PATHS: Lazy<Vec<std::path::PathBuf>> = Lazy::new(|| {
re_tracing::profile_function!();

use walkdir::WalkDir;

let dirpaths = std::env::var("PATH")
.ok()
.into_iter()
.flat_map(|paths| paths.split(':').map(ToOwned::to_owned).collect::<Vec<_>>())
.map(std::path::PathBuf::from);

let executables: ahash::HashSet<_> = dirpaths
.into_iter()
.flat_map(|dirpath| {
WalkDir::new(dirpath).into_iter().filter_map(|entry| {
let Ok(entry) = entry else {
return None;
};
let filepath = entry.path();
let is_rerun_loader = filepath.file_name().map_or(false, |filename| {
filename
.to_string_lossy()
.starts_with(EXTERNAL_DATA_LOADER_PREFIX)
});
(filepath.is_file() && is_rerun_loader).then(|| filepath.to_owned())
})
})
.collect();

// NOTE: We call all available loaders and do so in parallel: order is irrelevant here.
let executables = executables.into_iter().collect::<Vec<_>>();

// If the user has multiple data-loaders in their PATH with the same exact name, warn that
// something is very likely wrong.
// That can very easily happen with tools like `pip`/`pipx`.

let mut exe_names = HashMap::<String, Vec<std::path::PathBuf>>::default();
for path in &executables {
if let Some(filename) = path.file_name() {
let exe_paths = exe_names
.entry(filename.to_string_lossy().to_string())
.or_default();
exe_paths.push(path.clone());
}
}

for (name, paths) in exe_names {
if paths.len() > 1 {
re_log::warn!(name, ?paths, "Found duplicated data-loader in $PATH");
let mut executables = HashMap::<String, Vec<std::path::PathBuf>>::default();
for dirpath in dirpaths {
let Ok(dir) = std::fs::read_dir(dirpath) else {
continue;
};
let paths = dir.into_iter().filter_map(|entry| {
let Ok(entry) = entry else {
return None;
};
let filepath = entry.path();
let is_rerun_loader = filepath.file_name().map_or(false, |filename| {
filename
.to_string_lossy()
.starts_with(EXTERNAL_DATA_LOADER_PREFIX)
});
(filepath.is_file() && is_rerun_loader).then_some(filepath)
});

for path in paths {
if let Some(filename) = path.file_name() {
let exe_paths = executables
.entry(filename.to_string_lossy().to_string())
.or_default();
exe_paths.push(path.clone());
}
}
}

executables
.into_iter()
.filter_map(|(name, paths)| {
if paths.len() > 1 {
re_log::debug!(name, ?paths, "Found duplicated data-loader in $PATH");
}

// Only keep the first entry according to PATH order.
paths.into_iter().next()
})
.collect()
});

/// Iterator over all registered external [`crate::DataLoader`]s.
Expand Down

0 comments on commit be1d018

Please sign in to comment.