Merge pull request #162 from lars-t-hansen/w-152-final-cleanup

Fix #152 - Bump version, documentation, tidy up
NordicHPC · May 16, 2024 · 4a41c81 · 4a41c81
2 parents fc07391 + faffa0a
commit 4a41c81
Show file tree

Hide file tree

Showing 29 changed files with 487 additions and 128 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -14,7 +14,20 @@ jobs:
     steps:
     - name: Checkout
       uses: actions/checkout@v2
+
     - name: Build
       run: cargo build --verbose
+
     - name: Test
       run: cargo test --verbose
+
+      # one of the shell tests below needs jq
+    - name: Install jq
+      run: |
+        sudo apt-get update
+        sudo apt-get install -y jq
+
+    - name: Shell tests
+      run: |
+        cd tests
+        ./run_tests.sh
diff --git a/Cargo.toml b/Cargo.toml
@@ -7,4 +7,4 @@ edition = "2021"
 
 [dependencies]
 libc = "0.2"
-subprocess = "0.2"
+subprocess = { version = "= 0.2.9" }
diff --git a/README.md b/README.md
@@ -434,6 +434,22 @@ of processes.
 
 Optionally, `sonar` will use a lockfile to avoid a pile-up of processes.
 
+## Dependencies and updates
+
+Sonar runs everywhere and all the time, and even though it currently runs without privileges it
+strives to have as few dependencies as possible, so as not to become a target through a supply chain
+attack.  There are some rules:
+
+- It's OK to depend on libc and to incorporate new versions of libc
+- It's better to depend on something from the rust-lang organization than on something else
+- Every dependency needs to be justified
+- Every dependency must have a compatible license
+- Every dependency needs to be vetted as to active development, apparent quality, test cases
+- Every dependency update - even for security issues - is to be considered a code change that needs review
+- Remember that indirect dependencies are dependencies for us, too, and need to be treated the same way
+- If in doubt: copy the parts we need, vet them thoroughly, and maintain them separately
+
+There is a useful discussion of these matters [here](https://research.swtch.com/deps).
 
 ## How we run sonar on a cluster
 

diff --git a/src/amd.rs b/src/amd.rs
@@ -76,7 +76,11 @@ pub fn get_amd_information(user_by_pid: &UserTable) -> Result<Vec<gpu::Process>,
 
     match command::safe_command(AMD_CONCISE_COMMAND, AMD_CONCISE_ARGS, TIMEOUT_SECONDS) {
         Ok(concise_raw_text) => {
-            match command::safe_command(AMD_SHOWPIDGPUS_COMMAND, AMD_SHOWPIDGPUS_ARGS, TIMEOUT_SECONDS) {
+            match command::safe_command(
+                AMD_SHOWPIDGPUS_COMMAND,
+                AMD_SHOWPIDGPUS_ARGS,
+                TIMEOUT_SECONDS,
+            ) {
                 Ok(showpidgpus_raw_text) => Ok(extract_amd_information(
                     &concise_raw_text,
                     &showpidgpus_raw_text,

diff --git a/src/command.rs b/src/command.rs
@@ -22,7 +22,11 @@ pub enum CmdError {
 // especially at https://github.com/rust-lang/rust/issues/45572#issuecomment-860134955.  See also
 // https://doc.rust-lang.org/std/process/index.html (second code blob under "Handling I/O").
 
-pub fn safe_command(command: &str, args: &[&str], timeout_seconds: u64) -> Result<String, CmdError> {
+pub fn safe_command(
+    command: &str,
+    args: &[&str],
+    timeout_seconds: u64,
+) -> Result<String, CmdError> {
     let mut p = match Exec::cmd(command)
         .args(args)
         .stdout(Redirection::Pipe)
@@ -168,7 +172,9 @@ fn test_safe_command() {
         Err(_) => assert!(false),
     }
     // This really needs to be the output
-    assert!(safe_command("grep", &["^name =", "Cargo.toml"], 2) == Ok("name = \"sonar\"\n".to_string()));
+    assert!(
+        safe_command("grep", &["^name =", "Cargo.toml"], 2) == Ok("name = \"sonar\"\n".to_string())
+    );
     // Not found
     match safe_command("no-such-command-we-hope", &[], 2) {
         Err(CmdError::CouldNotStart(_)) => {}

diff --git a/src/hostname.rs b/src/hostname.rs
@@ -28,24 +28,20 @@ SOFTWARE.
  */
 
 use std::ffi::OsString;
-use std::os::unix::ffi::OsStringExt;
 use std::io;
-use libc;
+use std::os::unix::ffi::OsStringExt;
 
 pub fn get() -> io::Result<OsString> {
     // According to the POSIX specification,
     // host names are limited to `HOST_NAME_MAX` bytes
     //
     // https://pubs.opengroup.org/onlinepubs/9699919799/functions/gethostname.html
-    let size =
-        unsafe { libc::sysconf(libc::_SC_HOST_NAME_MAX) as libc::size_t };
+    let size = unsafe { libc::sysconf(libc::_SC_HOST_NAME_MAX) as libc::size_t };
 
     // Stack buffer OK: HOST_NAME_MAX is typically very small (64 on Linux).
     let mut buffer = vec![0u8; size];
 
-    let result = unsafe {
-        libc::gethostname(buffer.as_mut_ptr() as *mut libc::c_char, size)
-    };
+    let result = unsafe { libc::gethostname(buffer.as_mut_ptr() as *mut libc::c_char, size) };
 
     if result != 0 {
         return Err(io::Error::last_os_error());
@@ -61,9 +57,8 @@ fn wrap_buffer(mut bytes: Vec<u8>) -> OsString {
     let end = bytes
         .iter()
         .position(|&byte| byte == 0x00)
-        .unwrap_or_else(|| bytes.len());
+        .unwrap_or(bytes.len());
     bytes.resize(end, 0x00);
 
     OsString::from_vec(bytes)
 }
-
diff --git a/src/interrupt.rs b/src/interrupt.rs
@@ -1,3 +1,6 @@
+#[cfg(debug_assertions)]
+use crate::log;
+
 use std::sync::atomic::{AtomicBool, Ordering};
 
 // Signal handling logic.
@@ -19,15 +22,15 @@ extern "C" fn sonar_signal_handler(_: libc::c_int) {
 
 pub fn handle_interruptions() {
     unsafe {
-        let nomask : libc::sigset_t = std::mem::zeroed();
-        let mut action = libc::sigaction {
+        let nomask: libc::sigset_t = std::mem::zeroed();
+        let action = libc::sigaction {
             sa_sigaction: sonar_signal_handler as usize,
             sa_mask: nomask,
             sa_flags: 0,
             sa_restorer: None,
         };
-        libc::sigaction(libc::SIGTERM, &mut action, std::ptr::null_mut());
-        libc::sigaction(libc::SIGHUP, &mut action, std::ptr::null_mut());
+        libc::sigaction(libc::SIGTERM, &action, std::ptr::null_mut());
+        libc::sigaction(libc::SIGHUP, &action, std::ptr::null_mut());
     }
 }
 
@@ -38,7 +41,8 @@ pub fn is_interrupted() -> bool {
     }
     let flag = INTERRUPTED.load(Ordering::Relaxed);
     if flag {
-        println!("Interrupt flag was set!")
+        // Test cases depend on this exact output.
+        log::info("Interrupt flag was set!")
     }
     flag
 }

diff --git a/src/jobs.rs b/src/jobs.rs
@@ -9,5 +9,6 @@ pub trait JobManager {
     //
     // There's an assumption here that the process map is always the same for all lookups
     // performed on a particular instance of JobManager.
-    fn job_id_from_pid(&mut self, pid: usize, processes: &HashMap<usize, procfs::Process>) -> usize;
+    fn job_id_from_pid(&mut self, pid: usize, processes: &HashMap<usize, procfs::Process>)
+        -> usize;
 }
diff --git a/src/main.rs b/src/main.rs
@@ -17,7 +17,7 @@ mod users;
 mod util;
 
 const TIMEOUT_SECONDS: u64 = 5; // For subprocesses
-const USAGE_ERROR: i32 = 2;     // clap, Python, Go
+const USAGE_ERROR: i32 = 2; // clap, Python, Go
 
 enum Commands {
     /// Take a snapshot of the currently running processes
@@ -115,10 +115,12 @@ fn main() {
 //  - all error reporting is via a generic "usage" message, without specificity as to what was wrong
 
 fn command_line() -> Commands {
-    let mut args = std::env::args();
-    let _executable = args.next();
-    if let Some(command) = args.next() {
-        match command.as_str() {
+    let args = std::env::args().collect::<Vec<String>>();
+    let mut next = 1;
+    if next < args.len() {
+        let command = args[next].as_ref();
+        next += 1;
+        match command {
             "ps" => {
                 let mut batchless = false;
                 let mut rollup = false;
@@ -129,42 +131,43 @@ fn command_line() -> Commands {
                 let mut exclude_users = None;
                 let mut exclude_commands = None;
                 let mut lockdir = None;
-                loop {
-                    if let Some(arg) = args.next() {
-                        match arg.as_str() {
-                            "--batchless" => {
-                                batchless = true;
-                            }
-                            "--rollup" => {
-                                rollup = true;
-                            }
-                            "--exclude-system-jobs" => {
-                                exclude_system_jobs = true;
-                            }
-                            "--exclude-users" => {
-                                (args, exclude_users) = string_value(args);
-                            }
-                            "--exclude-commands" => {
-                                (args, exclude_commands) = string_value(args);
-                            }
-                            "--lockdir" => {
-                                (args, lockdir) = string_value(args);
-                            }
-                            "--min-cpu-percent" => {
-                                (args, min_cpu_percent) = parsed_value::<f64>(args);
-                            }
-                            "--min-mem-percent" => {
-                                (args, min_mem_percent) = parsed_value::<f64>(args);
-                            }
-                            "--min-cpu-time" => {
-                                (args, min_cpu_time) = parsed_value::<usize>(args);
-                            }
-                            _ => {
-                                usage(true);
-                            }
-                        }
+                while next < args.len() {
+                    let arg = args[next].as_ref();
+                    next += 1;
+                    if let Some(new_next) = bool_arg(arg, &args, next, "--batchless") {
+                        (next, batchless) = (new_next, true);
+                    } else if let Some(new_next) = bool_arg(arg, &args, next, "--rollup") {
+                        (next, rollup) = (new_next, true);
+                    } else if let Some(new_next) =
+                        bool_arg(arg, &args, next, "--exclude-system-jobs")
+                    {
+                        (next, exclude_system_jobs) = (new_next, true);
+                    } else if let Some((new_next, value)) =
+                        string_arg(arg, &args, next, "--exclude-users")
+                    {
+                        (next, exclude_users) = (new_next, Some(value));
+                    } else if let Some((new_next, value)) =
+                        string_arg(arg, &args, next, "--exclude-commands")
+                    {
+                        (next, exclude_commands) = (new_next, Some(value));
+                    } else if let Some((new_next, value)) =
+                        string_arg(arg, &args, next, "--lockdir")
+                    {
+                        (next, lockdir) = (new_next, Some(value));
+                    } else if let Some((new_next, value)) =
+                        numeric_arg::<f64>(arg, &args, next, "--min-cpu-percent")
+                    {
+                        (next, min_cpu_percent) = (new_next, Some(value));
+                    } else if let Some((new_next, value)) =
+                        numeric_arg::<f64>(arg, &args, next, "--min-mem-percent")
+                    {
+                        (next, min_mem_percent) = (new_next, Some(value));
+                    } else if let Some((new_next, value)) =
+                        numeric_arg::<usize>(arg, &args, next, "--min-cpu-time")
+                    {
+                        (next, min_cpu_time) = (new_next, Some(value));
                     } else {
-                        break;
+                        usage(true);
                     }
                 }
 
@@ -178,7 +181,8 @@ fn command_line() -> Commands {
                     eprintln!("--rollup and --batchless are incompatible");
                     std::process::exit(USAGE_ERROR);
                 }
-                return Commands::PS {
+
+                Commands::PS {
                     batchless,
                     rollup,
                     min_cpu_percent,
@@ -188,9 +192,9 @@ fn command_line() -> Commands {
                     exclude_users,
                     exclude_commands,
                     lockdir,
-                };
+                }
             }
-            "sysinfo" => return Commands::Sysinfo {},
+            "sysinfo" => Commands::Sysinfo {},
             "help" => {
                 usage(false);
             }
@@ -203,24 +207,47 @@ fn command_line() -> Commands {
     }
 }
 
-fn string_value(mut args: std::env::Args) -> (std::env::Args, Option<String>) {
-    if let Some(val) = args.next() {
-        (args, Some(val))
+fn bool_arg(arg: &str, _args: &[String], next: usize, opt_name: &str) -> Option<usize> {
+    if arg == opt_name {
+        Some(next)
     } else {
-        usage(true);
+        None
+    }
+}
+
+fn string_arg(arg: &str, args: &[String], next: usize, opt_name: &str) -> Option<(usize, String)> {
+    if arg == opt_name {
+        if next < args.len() {
+            Some((next + 1, args[next].to_string()))
+        } else {
+            None
+        }
+    } else if let Some((first, rest)) = arg.split_once('=') {
+        if first == opt_name {
+            Some((next, rest.to_string()))
+        } else {
+            None
+        }
+    } else {
+        None
     }
 }
 
-fn parsed_value<T: std::str::FromStr>(mut args: std::env::Args) -> (std::env::Args, Option<T>) {
-    if let Some(val) = args.next() {
-        match val.parse::<T>() {
-            Ok(value) => (args, Some(value)),
+fn numeric_arg<T: std::str::FromStr>(
+    arg: &str,
+    args: &[String],
+    next: usize,
+    opt_name: &str,
+) -> Option<(usize, T)> {
+    if let Some((next, strval)) = string_arg(arg, args, next, opt_name) {
+        match strval.parse::<T>() {
+            Ok(value) => Some((next, value)),
             _ => {
                 usage(true);
             }
         }
     } else {
-        usage(true);
+        None
     }
 }
 

diff --git a/src/nvidia.rs b/src/nvidia.rs
@@ -238,8 +238,10 @@ fn parse_pmon_output(raw_text: &str, user_by_pid: &UserTable) -> Result<Vec<gpu:
 
 const NVIDIA_QUERY_COMMAND: &str = "nvidia-smi";
 
-const NVIDIA_QUERY_ARGS: &[&str] =
-    &["--query-compute-apps=pid,used_memory", "--format=csv,noheader,nounits"];
+const NVIDIA_QUERY_ARGS: &[&str] = &[
+    "--query-compute-apps=pid,used_memory",
+    "--format=csv,noheader,nounits",
+];
 
 // Same signature as extract_nvidia_pmon_processes(), q.v. but user is always "_zombie_<PID>" and
 // command is always "_unknown_".  Only pids not in user_by_pid are returned.